Search in sources :

Example 1 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class BucketMapjoinProc method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    BucketJoinProcCtx context = (BucketJoinProcCtx) procCtx;
    MapJoinOperator mapJoinOperator = (MapJoinOperator) nd;
    // can the mapjoin present be converted to a bucketed mapjoin
    boolean convert = canConvertMapJoinToBucketMapJoin(mapJoinOperator, context);
    HiveConf conf = context.getConf();
    // bucketed mapjoin cannot be performed
    if (!convert && conf.getBoolVar(HiveConf.ConfVars.HIVEENFORCEBUCKETMAPJOIN)) {
        throw new SemanticException(ErrorMsg.BUCKET_MAPJOIN_NOT_POSSIBLE.getMsg());
    }
    if (convert) {
        // convert the mapjoin to a bucketized mapjoin
        convertMapJoinToBucketMapJoin(mapJoinOperator, context);
    }
    return null;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 2 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class SparkSMBJoinHintOptimizer method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
    SortBucketJoinProcCtx smbJoinContext = (SortBucketJoinProcCtx) procCtx;
    boolean convert = canConvertBucketMapJoinToSMBJoin(mapJoinOp, stack, smbJoinContext, nodeOutputs);
    // and sort merge bucketed mapjoin cannot be performed
    if (!convert && pGraphContext.getConf().getBoolVar(HiveConf.ConfVars.HIVEENFORCESORTMERGEBUCKETMAPJOIN)) {
        throw new SemanticException(ErrorMsg.SORTMERGE_MAPJOIN_FAILED.getMsg());
    }
    if (convert) {
        removeSmallTableReduceSink(mapJoinOp);
        convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext);
    }
    return null;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SortBucketJoinProcCtx(org.apache.hadoop.hive.ql.optimizer.SortBucketJoinProcCtx) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 3 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class MapJoinProcessor method generateMapJoinOperator.

public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator op, int mapJoinPos) throws SemanticException {
    HiveConf hiveConf = pctx.getConf();
    boolean noCheckOuterJoin = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN);
    MapJoinOperator mapJoinOp = convertMapJoin(pctx.getConf(), op, op.getConf().isLeftInputJoin(), op.getConf().getBaseSrc(), op.getConf().getMapAliases(), mapJoinPos, noCheckOuterJoin, true);
    // create a dummy select to select all columns
    genSelectPlan(pctx, mapJoinOp);
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) HiveConf(org.apache.hadoop.hive.conf.HiveConf)

Example 4 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class MapJoinProcessor method genSelectPlan.

protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
    List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
    input.setChildOperators(null);
    // create a dummy select - This select is needed by the walker to split the
    // mapJoin later on
    RowSchema inputRS = input.getSchema();
    ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputs = new ArrayList<String>();
    List<String> outputCols = input.getConf().getOutputColumnNames();
    ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < outputCols.size(); i++) {
        String internalName = outputCols.get(i);
        ColumnInfo valueInfo = inputRS.getColumnInfo(internalName);
        ExprNodeDesc colDesc = new ExprNodeColumnDesc(valueInfo.getType(), valueInfo.getInternalName(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol());
        exprs.add(colDesc);
        outputs.add(internalName);
        ColumnInfo newCol = new ColumnInfo(internalName, valueInfo.getType(), valueInfo.getTabAlias(), valueInfo.getIsVirtualCol(), valueInfo.isHiddenVirtualCol());
        newCol.setAlias(valueInfo.getAlias());
        outputRS.add(newCol);
        colExprMap.put(internalName, colDesc);
    }
    SelectDesc select = new SelectDesc(exprs, outputs, false);
    SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), input);
    sel.setColumnExprMap(colExprMap);
    // Insert the select operator in between.
    sel.setChildOperators(childOps);
    for (Operator<? extends OperatorDesc> ch : childOps) {
        ch.replaceParent(input, sel);
    }
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ScriptOperator(org.apache.hadoop.hive.ql.exec.ScriptOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 5 with MapJoinOperator

use of org.apache.hadoop.hive.ql.exec.MapJoinOperator in project hive by apache.

the class ReduceSinkMapJoinProc method process.

/* (non-Javadoc)
   * This processor addresses the RS-MJ case that occurs in tez on the small/hash
   * table side of things. The work that RS will be a part of must be connected
   * to the MJ work via be a broadcast edge.
   * We should not walk down the tree when we encounter this pattern because:
   * the type of work (map work or reduce work) needs to be determined
   * on the basis of the big table side because it may be a mapwork (no need for shuffle)
   * or reduce work.
   */
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, Object... nodeOutputs) throws SemanticException {
    GenTezProcContext context = (GenTezProcContext) procContext;
    MapJoinOperator mapJoinOp = (MapJoinOperator) nd;
    // remember the original parent list before we start modifying it.
    if (!context.mapJoinParentMap.containsKey(mapJoinOp)) {
        List<Operator<?>> parents = new ArrayList<Operator<?>>(mapJoinOp.getParentOperators());
        context.mapJoinParentMap.put(mapJoinOp, parents);
    }
    boolean isBigTable = stack.size() < 2 || !(stack.get(stack.size() - 2) instanceof ReduceSinkOperator);
    ReduceSinkOperator parentRS = null;
    if (!isBigTable) {
        parentRS = (ReduceSinkOperator) stack.get(stack.size() - 2);
        // For dynamic partitioned hash join, the big table will also be coming from a ReduceSinkOperator
        // Check for this condition.
        // TODO: use indexOf(), or parentRS.getTag()?
        isBigTable = (mapJoinOp.getParentOperators().indexOf(parentRS) == mapJoinOp.getConf().getPosBigTable());
    }
    if (mapJoinOp.getConf().isDynamicPartitionHashJoin() && !context.mapJoinToUnprocessedSmallTableReduceSinks.containsKey(mapJoinOp)) {
        // Initialize set of unprocessed small tables
        Set<ReduceSinkOperator> rsSet = Sets.newIdentityHashSet();
        for (int pos = 0; pos < mapJoinOp.getParentOperators().size(); ++pos) {
            if (pos == mapJoinOp.getConf().getPosBigTable()) {
                continue;
            }
            rsSet.add((ReduceSinkOperator) mapJoinOp.getParentOperators().get(pos));
        }
        context.mapJoinToUnprocessedSmallTableReduceSinks.put(mapJoinOp, rsSet);
    }
    if (isBigTable) {
        context.currentMapJoinOperators.add(mapJoinOp);
        return null;
    }
    context.preceedingWork = null;
    context.currentRootOperator = null;
    return processReduceSinkToHashJoin(parentRS, mapJoinOp, context);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ArrayList(java.util.ArrayList) GenTezProcContext(org.apache.hadoop.hive.ql.parse.GenTezProcContext)

Aggregations

MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)41 Operator (org.apache.hadoop.hive.ql.exec.Operator)22 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)22 ArrayList (java.util.ArrayList)19 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)18 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)17 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)15 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)12 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)9 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)9 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)9 HashMap (java.util.HashMap)8 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)8 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)8 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)8 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)8 List (java.util.List)7