Search in sources :

Example 1 with GenTezProcContext

use of org.apache.hadoop.hive.ql.parse.GenTezProcContext in project hive by apache.

the class ReduceSinkMapJoinProc method process.

/* (non-Javadoc)
   * This processor addresses the RS-MJ case that occurs in tez on the small/hash
   * table side of things. The work that RS will be a part of must be connected
   * to the MJ work via be a broadcast edge.
   * We should not walk down the tree when we encounter this pattern because:
   * the type of work (map work or reduce work) needs to be determined
   * on the basis of the big table side because it may be a mapwork (no need for shuffle)
   * or reduce work.
   */
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
    GenTezProcContext context = (GenTezProcContext) procContext;
    MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
    // remember the original parent list before we start modifying it.
    if (!context.mapJoinParentMap.containsKey(mapJoinOp)) {
        List<Operator<?>> parents = new ArrayList<Operator<?>>(mapJoinOp.getParentOperators());
        context.mapJoinParentMap.put(mapJoinOp, parents);
    }
    boolean isBigTable = stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof ReduceSinkOperator);
    ReduceSinkOperator parentRS = null;
    if (!isBigTable) {
        parentRS = (ReduceSinkOperator) stack.get(stack.size() - 2);
        // For dynamic partitioned hash join, the big table will also be coming from a ReduceSinkOperator
        // Check for this condition.
        // TODO: use indexOf(), or parentRS.getTag()?
        isBigTable = (mapJoinOp.getParentOperators().indexOf(parentRS) == mapJoinOp.getConf().getPosBigTable());
    }
    if (mapJoinOp.getConf().isDynamicPartitionHashJoin() && !context.mapJoinToUnprocessedSmallTableReduceSinks.containsKey(mapJoinOp)) {
        // Initialize set of unprocessed small tables
        Set<ReduceSinkOperator> rsSet = Sets.newIdentityHashSet();
        for (int pos = 0; pos < mapJoinOp.getParentOperators().size(); ++pos) {
            if (pos == mapJoinOp.getConf().getPosBigTable()) {
                continue;
            }
            rsSet.add((ReduceSinkOperator) mapJoinOp.getParentOperators().get(pos));
        }
        context.mapJoinToUnprocessedSmallTableReduceSinks.put(mapJoinOp, rsSet);
    }
    if (isBigTable) {
        context.currentMapJoinOperators.add(mapJoinOp);
        return null;
    }
    context.preceedingWork = null;
    context.currentRootOperator = null;
    return processReduceSinkToHashJoin(parentRS, mapJoinOp, context);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ArrayList(java.util.ArrayList) GenTezProcContext(org.apache.hadoop.hive.ql.parse.GenTezProcContext)

Example 2 with GenTezProcContext

use of org.apache.hadoop.hive.ql.parse.GenTezProcContext in project hive by apache.

the class MergeJoinProc method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    GenTezProcContext context = (GenTezProcContext) procCtx;
    CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) nd;
    if (stack.size() < 2) {
        // safety check for L53 to get parentOp, although it is very unlikely that
        // stack size is less than 2, i.e., there is only one MergeJoinOperator in the stack.
        context.currentMergeJoinOperator = mergeJoinOp;
        return null;
    }
    TezWork tezWork = context.currentTask.getWork();
    @SuppressWarnings("unchecked") Operator<? extends OperatorDesc> parentOp = (Operator<? extends OperatorDesc>) ((stack.get(stack.size() - 2)));
    // we need to set the merge work that has been created as part of the dummy store walk. If a
    // merge work already exists for this merge join operator, add the dummy store work to the
    // merge work. Else create a merge work, add above work to the merge work
    MergeJoinWork mergeWork = null;
    if (context.opMergeJoinWorkMap.containsKey(mergeJoinOp)) {
        // we already have the merge work corresponding to this merge join operator
        mergeWork = context.opMergeJoinWorkMap.get(mergeJoinOp);
    } else {
        mergeWork = new MergeJoinWork();
        tezWork.add(mergeWork);
        context.opMergeJoinWorkMap.put(mergeJoinOp, mergeWork);
    }
    if (!(stack.get(stack.size() - 2) instanceof DummyStoreOperator)) {
        /* this may happen in one of the following case:
      TS[0], FIL[26], SEL[2], DUMMY_STORE[30], MERGEJOIN[29]]
                                              /                              
      TS[3], FIL[27], SEL[5], ---------------
      */
        context.currentMergeJoinOperator = mergeJoinOp;
        mergeWork.setTag(mergeJoinOp.getTagForOperator(parentOp));
        return null;
    }
    // Guaranteed to be just 1 because each DummyStoreOperator can be part of only one work.
    BaseWork parentWork = context.childToWorkMap.get(parentOp).get(0);
    mergeWork.addMergedWork(null, parentWork, context.leafOperatorToFollowingWork);
    mergeWork.setMergeJoinOperator(mergeJoinOp);
    tezWork.setVertexType(mergeWork, VertexType.MULTI_INPUT_UNINITIALIZED_EDGES);
    for (BaseWork grandParentWork : tezWork.getParents(parentWork)) {
        TezEdgeProperty edgeProp = tezWork.getEdgeProperty(grandParentWork, parentWork);
        tezWork.disconnect(grandParentWork, parentWork);
        tezWork.connect(grandParentWork, mergeWork, edgeProp);
    }
    for (BaseWork childWork : tezWork.getChildren(parentWork)) {
        TezEdgeProperty edgeProp = tezWork.getEdgeProperty(parentWork, childWork);
        tezWork.disconnect(parentWork, childWork);
        tezWork.connect(mergeWork, childWork, edgeProp);
    }
    tezWork.remove(parentWork);
    DummyStoreOperator dummyOp = (DummyStoreOperator) (stack.get(stack.size() - 2));
    parentWork.setTag(mergeJoinOp.getTagForOperator(dummyOp));
    mergeJoinOp.getParentOperators().remove(dummyOp);
    dummyOp.getChildOperators().clear();
    return true;
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) MergeJoinWork(org.apache.hadoop.hive.ql.plan.MergeJoinWork) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TezEdgeProperty(org.apache.hadoop.hive.ql.plan.TezEdgeProperty) GenTezProcContext(org.apache.hadoop.hive.ql.parse.GenTezProcContext) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Aggregations

Operator (org.apache.hadoop.hive.ql.exec.Operator)2 GenTezProcContext (org.apache.hadoop.hive.ql.parse.GenTezProcContext)2 ArrayList (java.util.ArrayList)1 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)1 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)1 HashTableDummyOperator (org.apache.hadoop.hive.ql.exec.HashTableDummyOperator)1 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)1 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)1 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)1 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)1 MergeJoinWork (org.apache.hadoop.hive.ql.plan.MergeJoinWork)1 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)1 TezEdgeProperty (org.apache.hadoop.hive.ql.plan.TezEdgeProperty)1 TezWork (org.apache.hadoop.hive.ql.plan.TezWork)1