Search in sources :

Example 26 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class ConvertJoinMapJoin method convertJoinDynamicPartitionedHashJoin.

private boolean convertJoinDynamicPartitionedHashJoin(JoinOperator joinOp, OptimizeTezProcContext context) throws SemanticException {
    // Attempt dynamic partitioned hash join
    // Since we don't have big table index yet, must start with estimate of numReducers
    int numReducers = estimateNumBuckets(joinOp, false);
    LOG.info("Try dynamic partitioned hash join with estimated " + numReducers + " reducers");
    int bigTablePos = getMapJoinConversionPos(joinOp, context, numReducers, false, context.conf.getLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD), false);
    if (bigTablePos >= 0) {
        // Now that we have the big table index, get real numReducers value based on big table RS
        ReduceSinkOperator bigTableParentRS = (ReduceSinkOperator) (joinOp.getParentOperators().get(bigTablePos));
        numReducers = bigTableParentRS.getConf().getNumReducers();
        LOG.debug("Real big table reducers = " + numReducers);
        MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePos, false);
        if (mapJoinOp != null) {
            LOG.info("Selected dynamic partitioned hash join");
            mapJoinOp.getConf().setDynamicPartitionHashJoin(true);
            // Set OpTraits for dynamically partitioned hash join:
            // bucketColNames: Re-use previous joinOp's bucketColNames. Parent operators should be
            //   reduce sink, which should have bucket columns based on the join keys.
            // numBuckets: set to number of reducers
            // sortCols: This is an unsorted join - no sort cols
            OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numReducers, null, joinOp.getOpTraits().getNumReduceSinks());
            mapJoinOp.setOpTraits(opTraits);
            mapJoinOp.setStatistics(joinOp.getStatistics());
            // propagate this change till the next RS
            for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
                setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
            }
            return true;
        }
    }
    return false;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)

Example 27 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class MapJoinProcessor method convertJoinOpMapJoinOp.

public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List<String> mapAliases, int mapJoinPos, boolean noCheckOuterJoin, boolean adjustParentsChildren) throws SemanticException {
    MapJoinDesc mapJoinDescriptor = getMapJoinDesc(hconf, op, leftInputJoin, baseSrc, mapAliases, mapJoinPos, noCheckOuterJoin, adjustParentsChildren);
    // reduce sink row resolver used to generate map join op
    RowSchema outputRS = op.getSchema();
    MapJoinOperator mapJoinOp = (MapJoinOperator) OperatorFactory.getAndMakeChild(op.getCompilationOpContext(), mapJoinDescriptor, new RowSchema(outputRS.getSignature()), op.getParentOperators());
    mapJoinOp.getConf().setReversedExprs(op.getConf().getReversedExprs());
    Map<String, ExprNodeDesc> colExprMap = op.getColumnExprMap();
    mapJoinOp.setColumnExprMap(colExprMap);
    List<Operator<? extends OperatorDesc>> childOps = op.getChildOperators();
    for (Operator<? extends OperatorDesc> childOp : childOps) {
        childOp.replaceParent(op, mapJoinOp);
    }
    mapJoinOp.setPosToAliasMap(op.getPosToAliasMap());
    mapJoinOp.setChildOperators(childOps);
    op.setChildOperators(null);
    op.setParentOperators(null);
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 28 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class SortedMergeBucketMapjoinProc method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    if (nd instanceof SMBMapJoinOperator) {
        return null;
    }
    MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
    SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx;
    boolean convert = canConvertBucketMapJoinToSMBJoin(mapJoinOp, stack, smbJoinContext, nodeOutputs);
    // and sort merge bucketed mapjoin cannot be performed
    if (!convert && pGraphContext.getConf().getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTMERGEBUCKETMAPJOIN)) {
        throw new SemanticException(ErrorMsg.SORTMERGE_MAPJOIN_FAILED.getMsg());
    }
    if (convert) {
        convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
    }
    return null;
}
Also used : SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 29 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class AbstractMapJoin method setupBenchmarkImplementation.

protected static MapJoinOperator setupBenchmarkImplementation(MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception {
    // UNDONE: Parameterize for implementation variation?
    MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc);
    final boolean isVectorOutput = isVectorOutput(mapJoinImplementation);
    // This collector is just a row counter.
    Operator<? extends OperatorDesc> testCollectorOperator = (!isVectorOutput ? new CountCollectorTestOperator() : new CountVectorCollectorTestOperator());
    MapJoinOperator operator = MapJoinTestConfig.createMapJoinImplementation(mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc);
    return operator;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) CountCollectorTestOperator(org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator) CountVectorCollectorTestOperator(org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator)

Example 30 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class ConvertJoinMapJoin method convertJoinMapJoin.

/*
   * Once we have decided on the map join, the tree would transform from
   *
   *        |                   |
   *       Join               MapJoin
   *       / \                /   \
   *     RS   RS   --->     RS    TS (big table)
   *    /      \           /
   *   TS       TS        TS (small table)
   *
   * for tez.
   */
public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, boolean removeReduceSink) throws SemanticException {
    // of the constituent reduce sinks.
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        if (parentOp instanceof MuxOperator) {
            return null;
        }
    }
    // can safely convert the join to a map join.
    MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink);
    mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN));
    List<ExprNodeDesc> joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        // In case of cross join, we disable hybrid grace hash join
        mapJoinOp.getConf().setHybridHashJoin(false);
    }
    Operator<? extends OperatorDesc> parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition);
    if (parentBigTableOp instanceof ReduceSinkOperator) {
        Operator<?> parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0);
        if (removeReduceSink) {
            for (Operator<?> p : parentBigTableOp.getParentOperators()) {
                // we might have generated a dynamic partition operator chain. Since
                // we're removing the reduce sink we need do remove that too.
                Set<Operator<?>> dynamicPartitionOperators = new HashSet<Operator<?>>();
                Map<Operator<?>, AppMasterEventOperator> opEventPairs = new HashMap<>();
                for (Operator<?> c : p.getChildOperators()) {
                    AppMasterEventOperator event = findDynamicPartitionBroadcast(c);
                    if (event != null) {
                        dynamicPartitionOperators.add(c);
                        opEventPairs.put(c, event);
                    }
                }
                for (Operator<?> c : dynamicPartitionOperators) {
                    if (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) {
                        p.removeChild(c);
                        // at this point we've found the fork in the op pipeline that has the pruning as a child plan.
                        LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() + ". Need to be removed together with reduce sink");
                    }
                }
                for (Operator<?> op : dynamicPartitionOperators) {
                    context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op));
                }
            }
            mapJoinOp.getParentOperators().remove(bigTablePosition);
            if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) {
                mapJoinOp.getParentOperators().add(bigTablePosition, parentBigTableOp.getParentOperators().get(0));
            }
            parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp);
        }
        for (Operator<? extends OperatorDesc> op : mapJoinOp.getParentOperators()) {
            if (!(op.getChildOperators().contains(mapJoinOp))) {
                op.getChildOperators().add(mapJoinOp);
            }
            op.getChildOperators().remove(joinOp);
        }
        // join which takes place in a separate task.
        if (context.parseContext.getRsToSemiJoinBranchInfo().size() > 0 && removeReduceSink) {
            removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext);
        }
    }
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) HashMap(java.util.HashMap) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet)

Aggregations

MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)41 Operator (org.apache.hadoop.hive.ql.exec.Operator)22 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)22 ArrayList (java.util.ArrayList)19 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)18 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)17 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)15 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)12 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)9 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)9 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)9 HashMap (java.util.HashMap)8 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)8 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)8 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)8 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)8 List (java.util.List)7