Search in sources :

Example 21 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class AbstractSMBJoinProc method convertJoinToBucketMapJoin.

// Convert the join operator to a bucket map-join join operator
protected MapJoinOperator convertJoinToBucketMapJoin(JoinOperator joinOp, SortBucketJoinProcCtx joinContext) throws SemanticException {
    MapJoinOperator mapJoinOp = new MapJoinProcessor().convertMapJoin(pGraphContext.getConf(), joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), joinContext.getBigTablePosition(), false, false);
    // Remove the join operator from the query join context
    // Data structures coming from QBJoinTree
    mapJoinOp.getConf().setQBJoinTreeProps(joinOp.getConf());
    // 
    pGraphContext.getMapJoinOps().add(mapJoinOp);
    pGraphContext.getJoinOps().remove(joinOp);
    convertMapJoinToBucketMapJoin(mapJoinOp, joinContext);
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)

Example 22 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class AbstractSMBJoinProc method convertBucketMapJoinToSMBJoin.

// Convert the bucket map-join operator to a sort-merge map join operator
protected SMBMapJoinOperator convertBucketMapJoinToSMBJoin(MapJoinOperator mapJoinOp, SortBucketJoinProcCtx smbJoinContext) {
    String[] srcs = smbJoinContext.getSrcs();
    SMBMapJoinOperator smbJop = new SMBMapJoinOperator(mapJoinOp);
    SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf());
    smbJop.setConf(smbJoinDesc);
    HashMap<Byte, String> tagToAlias = new HashMap<Byte, String>();
    for (int i = 0; i < srcs.length; i++) {
        tagToAlias.put((byte) i, srcs[i]);
    }
    smbJoinDesc.setTagToAlias(tagToAlias);
    int indexInListMapJoinNoReducer = this.pGraphContext.getListMapJoinOpsNoReducer().indexOf(mapJoinOp);
    if (indexInListMapJoinNoReducer >= 0) {
        this.pGraphContext.getListMapJoinOpsNoReducer().remove(indexInListMapJoinNoReducer);
        this.pGraphContext.getListMapJoinOpsNoReducer().add(indexInListMapJoinNoReducer, smbJop);
    }
    Map<String, DummyStoreOperator> aliasToSink = new HashMap<String, DummyStoreOperator>();
    // For all parents (other than the big table), insert a dummy store operator
    /* Consider a query like:
     *
     * select * from
     *   (subq1 --> has a filter)
     *   join
     *   (subq2 --> has a filter)
     * on some key
     *
     * Let us assume that subq1 is the small table (either specified by the user or inferred
     * automatically). The following operator tree will be created:
     *
     * TableScan (subq1) --> Select --> Filter --> DummyStore
     *                                                         \
     *                                                          \     SMBJoin
     *                                                          /
     *                                                         /
     * TableScan (subq2) --> Select --> Filter
     */
    List<Operator<? extends OperatorDesc>> parentOperators = mapJoinOp.getParentOperators();
    for (int i = 0; i < parentOperators.size(); i++) {
        Operator<? extends OperatorDesc> par = parentOperators.get(i);
        int index = par.getChildOperators().indexOf(mapJoinOp);
        par.getChildOperators().remove(index);
        if (i == smbJoinDesc.getPosBigTable()) {
            par.getChildOperators().add(index, smbJop);
        } else {
            DummyStoreOperator dummyStoreOp = new DummyStoreOperator(par.getCompilationOpContext());
            par.getChildOperators().add(index, dummyStoreOp);
            List<Operator<? extends OperatorDesc>> childrenOps = new ArrayList<Operator<? extends OperatorDesc>>();
            childrenOps.add(smbJop);
            dummyStoreOp.setChildOperators(childrenOps);
            List<Operator<? extends OperatorDesc>> parentOps = new ArrayList<Operator<? extends OperatorDesc>>();
            parentOps.add(par);
            dummyStoreOp.setParentOperators(parentOps);
            aliasToSink.put(srcs[i], dummyStoreOp);
            smbJop.getParentOperators().remove(i);
            smbJop.getParentOperators().add(i, dummyStoreOp);
        }
    }
    smbJoinDesc.setAliasToSink(aliasToSink);
    List<Operator<? extends OperatorDesc>> childOps = mapJoinOp.getChildOperators();
    for (int i = 0; i < childOps.size(); i++) {
        Operator<? extends OperatorDesc> child = childOps.get(i);
        int index = child.getParentOperators().indexOf(mapJoinOp);
        child.getParentOperators().remove(index);
        child.getParentOperators().add(index, smbJop);
    }
    // Data structures coming from QBJoinTree
    smbJop.getConf().setQBJoinTreeProps(mapJoinOp.getConf());
    // 
    pGraphContext.getSmbMapJoinOps().add(smbJop);
    pGraphContext.getMapJoinOps().remove(mapJoinOp);
    return smbJop;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) HashMap(java.util.HashMap) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) ArrayList(java.util.ArrayList) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 23 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class ConvertJoinMapJoin method process.

@Override
public /*
   * (non-Javadoc) we should ideally not modify the tree we traverse. However,
   * since we need to walk the tree at any time when we modify the operator, we
   * might as well do it here.
   */
Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    OptimizeTezProcContext context = (OptimizeTezProcContext) procCtx;
    JoinOperator joinOp = (JoinOperator) nd;
    long maxSize = context.conf.getLongVar(HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD);
    // adjust noconditional task size threshold for LLAP
    LlapClusterStateForCompile llapInfo = null;
    if ("llap".equalsIgnoreCase(context.conf.getVar(ConfVars.HIVE_EXECUTION_MODE))) {
        llapInfo = LlapClusterStateForCompile.getClusterInfo(context.conf);
        llapInfo.initClusterInfo();
    }
    MemoryMonitorInfo memoryMonitorInfo = getMemoryMonitorInfo(maxSize, context.conf, llapInfo);
    joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
    // not use map join in case of cross product
    boolean cartesianProductEdgeEnabled = HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED);
    if (cartesianProductEdgeEnabled && !hasOuterJoin(joinOp) && isCrossProduct(joinOp)) {
        fallbackToMergeJoin(joinOp, context);
        return null;
    }
    TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
    boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin();
    if (!hiveConvertJoin) {
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize);
        if (retval == null) {
            return retval;
        } else {
            fallbackToReduceSideJoin(joinOp, context, maxSize);
            return null;
        }
    }
    // if we have traits, and table info is present in the traits, we know the
    // exact number of buckets. Else choose the largest number of estimated
    // reducers from the parent operators.
    int numBuckets = -1;
    if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
        numBuckets = estimateNumBuckets(joinOp, true);
    } else {
        numBuckets = 1;
    }
    LOG.info("Estimated number of buckets " + numBuckets);
    int mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, numBuckets, false, maxSize, true);
    if (mapJoinConversionPos < 0) {
        Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize);
        if (retval == null) {
            return retval;
        } else {
            // only case is full outer join with SMB enabled which is not possible. Convert to regular
            // join.
            fallbackToReduceSideJoin(joinOp, context, maxSize);
            return null;
        }
    }
    if (context.conf.getBoolVar(HiveConf.ConfVars.HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ)) {
        // Check if we are in LLAP, if so it needs to be determined if we should use BMJ or DPHJ
        if (llapInfo != null) {
            if (selectJoinForLlap(context, joinOp, tezBucketJoinProcCtx, llapInfo, mapJoinConversionPos, numBuckets)) {
                return null;
            }
        } else if (numBuckets > 1 && convertJoinBucketMapJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
            return null;
        }
    }
    // check if we can convert to map join no bucket scaling.
    LOG.info("Convert to non-bucketed map join");
    if (numBuckets != 1) {
        mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1, false, maxSize, true);
    }
    if (mapJoinConversionPos < 0) {
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        fallbackToReduceSideJoin(joinOp, context, maxSize);
        return null;
    }
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos, true);
    // map join operator by default has no bucket cols and num of reduce sinks
    // reduced by 1
    mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks()));
    mapJoinOp.setStatistics(joinOp.getStatistics());
    // propagate this change till the next RS
    for (Operator<? extends OperatorDesc> childOp : mapJoinOp.getChildOperators()) {
        setAllChildrenTraits(childOp, mapJoinOp.getOpTraits());
    }
    return null;
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) LlapClusterStateForCompile(org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext)

Example 24 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class SparkSortMergeJoinOptimizer method convertJoinToSMBJoinAndReturn.

protected SMBMapJoinOperator convertJoinToSMBJoinAndReturn(JoinOperator joinOp, SortBucketJoinProcCtx smbJoinContext) throws SemanticException {
    MapJoinOperator mapJoinOp = convertJoinToBucketMapJoin(joinOp, smbJoinContext);
    SMBMapJoinOperator smbMapJoinOp = convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
    smbMapJoinOp.setConvertedAutomaticallySMBJoin(true);
    return smbMapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)

Example 25 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class AbstractSMBJoinProc method convertJoinToSMBJoin.

// Convert the join operator to a sort-merge join operator
protected void convertJoinToSMBJoin(JoinOperator joinOp, SortBucketJoinProcCtx smbJoinContext) throws SemanticException {
    MapJoinOperator mapJoinOp = convertJoinToBucketMapJoin(joinOp, smbJoinContext);
    SMBMapJoinOperator smbMapJoinOp = convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
    smbMapJoinOp.setConvertedAutomaticallySMBJoin(true);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)

Aggregations

MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)41 Operator (org.apache.hadoop.hive.ql.exec.Operator)22 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)22 ArrayList (java.util.ArrayList)19 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)18 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)17 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)15 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)12 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)9 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)9 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)9 HashMap (java.util.HashMap)8 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)8 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)8 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)8 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)8 List (java.util.List)7