Search in sources :

Example 56 with ReduceSinkOperator

use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.

the class HiveOpConverter method genJoin.

private static JoinOperator genJoin(RelNode join, ExprNodeDesc[][] joinExpressions, List<List<ExprNodeDesc>> filterExpressions, List<Operator<?>> children, String[] baseSrc, String tabAlias) throws SemanticException {
    // 1. Extract join type
    JoinCondDesc[] joinCondns;
    boolean semiJoin;
    boolean noOuterJoin;
    if (join instanceof HiveMultiJoin) {
        HiveMultiJoin hmj = (HiveMultiJoin) join;
        joinCondns = new JoinCondDesc[hmj.getJoinInputs().size()];
        for (int i = 0; i < hmj.getJoinInputs().size(); i++) {
            joinCondns[i] = new JoinCondDesc(new JoinCond(hmj.getJoinInputs().get(i).left, hmj.getJoinInputs().get(i).right, transformJoinType(hmj.getJoinTypes().get(i))));
        }
        semiJoin = false;
        noOuterJoin = !hmj.isOuterJoin();
    } else {
        joinCondns = new JoinCondDesc[1];
        semiJoin = join instanceof SemiJoin;
        JoinType joinType;
        if (semiJoin) {
            joinType = JoinType.LEFTSEMI;
        } else {
            joinType = extractJoinType((Join) join);
        }
        joinCondns[0] = new JoinCondDesc(new JoinCond(0, 1, joinType));
        noOuterJoin = joinType != JoinType.FULLOUTER && joinType != JoinType.LEFTOUTER && joinType != JoinType.RIGHTOUTER;
    }
    // 2. We create the join aux structures
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    ArrayList<String> outputColumnNames = new ArrayList<String>(join.getRowType().getFieldNames());
    Operator<?>[] childOps = new Operator[children.size()];
    Map<String, Byte> reversedExprs = new HashMap<String, Byte>();
    Map<Byte, List<ExprNodeDesc>> exprMap = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<Byte, List<ExprNodeDesc>> filters = new HashMap<Byte, List<ExprNodeDesc>>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    HashMap<Integer, Set<String>> posToAliasMap = new HashMap<Integer, Set<String>>();
    int outputPos = 0;
    for (int pos = 0; pos < children.size(); pos++) {
        // 2.1. Backtracking from RS
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        if (inputRS.getNumParent() != 1) {
            throw new SemanticException("RS should have single parent");
        }
        Operator<?> parent = inputRS.getParentOperators().get(0);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        int[] index = inputRS.getValueIndex();
        Byte tag = (byte) rsDesc.getTag();
        // 2.1.1. If semijoin...
        if (semiJoin && pos != 0) {
            exprMap.put(tag, new ArrayList<ExprNodeDesc>());
            childOps[pos] = inputRS;
            continue;
        }
        posToAliasMap.put(pos, new HashSet<String>(inputRS.getSchema().getTableNames()));
        List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
        List<String> valColNames = rsDesc.getOutputValueColumnNames();
        Map<String, ExprNodeDesc> descriptors = buildBacktrackFromReduceSinkForJoin(outputPos, outputColumnNames, keyColNames, valColNames, index, parent, baseSrc[pos]);
        List<ColumnInfo> parentColumns = parent.getSchema().getSignature();
        for (int i = 0; i < index.length; i++) {
            ColumnInfo info = new ColumnInfo(parentColumns.get(i));
            info.setInternalName(outputColumnNames.get(outputPos));
            info.setTabAlias(tabAlias);
            outputColumns.add(info);
            reversedExprs.put(outputColumnNames.get(outputPos), tag);
            outputPos++;
        }
        exprMap.put(tag, new ArrayList<ExprNodeDesc>(descriptors.values()));
        colExprMap.putAll(descriptors);
        childOps[pos] = inputRS;
    }
    // 3. We populate the filters and filterMap structure needed in the join descriptor
    List<List<ExprNodeDesc>> filtersPerInput = Lists.newArrayList();
    int[][] filterMap = new int[children.size()][];
    for (int i = 0; i < children.size(); i++) {
        filtersPerInput.add(new ArrayList<ExprNodeDesc>());
    }
    // 3. We populate the filters structure
    for (int i = 0; i < filterExpressions.size(); i++) {
        int leftPos = joinCondns[i].getLeft();
        int rightPos = joinCondns[i].getRight();
        for (ExprNodeDesc expr : filterExpressions.get(i)) {
            // We need to update the exprNode, as currently
            // they refer to columns in the output of the join;
            // they should refer to the columns output by the RS
            int inputPos = updateExprNode(expr, reversedExprs, colExprMap);
            if (inputPos == -1) {
                inputPos = leftPos;
            }
            filtersPerInput.get(inputPos).add(expr);
            if (joinCondns[i].getType() == JoinDesc.FULL_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.LEFT_OUTER_JOIN || joinCondns[i].getType() == JoinDesc.RIGHT_OUTER_JOIN) {
                if (inputPos == leftPos) {
                    updateFilterMap(filterMap, leftPos, rightPos);
                } else {
                    updateFilterMap(filterMap, rightPos, leftPos);
                }
            }
        }
    }
    for (int pos = 0; pos < children.size(); pos++) {
        ReduceSinkOperator inputRS = (ReduceSinkOperator) children.get(pos);
        ReduceSinkDesc rsDesc = inputRS.getConf();
        Byte tag = (byte) rsDesc.getTag();
        filters.put(tag, filtersPerInput.get(pos));
    }
    // 4. We create the join operator with its descriptor
    JoinDesc desc = new JoinDesc(exprMap, outputColumnNames, noOuterJoin, joinCondns, filters, joinExpressions, null);
    desc.setReversedExprs(reversedExprs);
    desc.setFilterMap(filterMap);
    JoinOperator joinOp = (JoinOperator) OperatorFactory.getAndMakeChild(childOps[0].getCompilationOpContext(), desc, new RowSchema(outputColumns), childOps);
    joinOp.setColumnExprMap(colExprMap);
    joinOp.setPosToAliasMap(posToAliasMap);
    joinOp.getConf().setBaseSrc(baseSrc);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + joinOp + " with row schema: [" + joinOp.getSchema() + "]");
    }
    return joinOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) JoinCond(org.apache.hadoop.hive.ql.parse.JoinCond) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) JoinType(org.apache.hadoop.hive.ql.parse.JoinType) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc)

Example 57 with ReduceSinkOperator

use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.

the class ColumnPrunerProcFactory method pruneJoinOperator.

private static void pruneJoinOperator(NodeProcessorCtx ctx, CommonJoinOperator op, JoinDesc conf, Map<String, ExprNodeDesc> columnExprMap, Map<Byte, List<Integer>> retainMap, boolean mapJoin) throws SemanticException {
    ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
    List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators();
    LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
    if (cppCtx.genColLists(op) == null) {
        return;
    }
    List<FieldNode> neededColList = new ArrayList<>(cppCtx.genColLists(op));
    Map<Byte, List<FieldNode>> prunedColLists = new HashMap<>();
    for (byte tag : conf.getTagOrder()) {
        prunedColLists.put(tag, new ArrayList<FieldNode>());
    }
    // add the columns in join filters
    Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet();
    Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator();
    while (iter.hasNext()) {
        Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next();
        Byte tag = entry.getKey();
        for (ExprNodeDesc desc : entry.getValue()) {
            List<FieldNode> cols = prunedColLists.get(tag);
            cols = mergeFieldNodesWithDesc(cols, desc);
            prunedColLists.put(tag, cols);
        }
    }
    // add the columns in residual filters
    if (conf.getResidualFilterExprs() != null) {
        for (ExprNodeDesc desc : conf.getResidualFilterExprs()) {
            neededColList = mergeFieldNodesWithDesc(neededColList, desc);
        }
    }
    RowSchema joinRS = op.getSchema();
    ArrayList<String> outputCols = new ArrayList<String>();
    ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < conf.getOutputColumnNames().size(); i++) {
        String internalName = conf.getOutputColumnNames().get(i);
        ExprNodeDesc desc = columnExprMap.get(internalName);
        Byte tag = conf.getReversedExprs().get(internalName);
        if (lookupColumn(neededColList, internalName) == null) {
            int index = conf.getExprs().get(tag).indexOf(desc);
            if (index < 0) {
                continue;
            }
            conf.getExprs().get(tag).remove(desc);
            if (retainMap != null) {
                retainMap.get(tag).remove(index);
            }
        } else {
            List<FieldNode> prunedRSList = prunedColLists.get(tag);
            if (prunedRSList == null) {
                prunedRSList = new ArrayList<>();
                prunedColLists.put(tag, prunedRSList);
            }
            prunedColLists.put(tag, mergeFieldNodesWithDesc(prunedRSList, desc));
            outputCols.add(internalName);
            newColExprMap.put(internalName, desc);
        }
    }
    if (mapJoin) {
        // regenerate the valueTableDesc
        List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
        for (int pos = 0; pos < op.getParentOperators().size(); pos++) {
            List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos));
            StringBuilder keyOrder = new StringBuilder();
            for (int i = 0; i < valueCols.size(); i++) {
                keyOrder.append("+");
            }
            TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
            valueTableDescs.add(valueTableDesc);
        }
        ((MapJoinDesc) conf).setValueTblDescs(valueTableDescs);
        Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet();
        Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator();
        while (iters.hasNext()) {
            Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next();
            List<ExprNodeDesc> lists = entry.getValue();
            for (int j = 0; j < lists.size(); j++) {
                ExprNodeDesc desc = lists.get(j);
                Byte tag = entry.getKey();
                List<FieldNode> cols = prunedColLists.get(tag);
                cols = mergeFieldNodesWithDesc(cols, desc);
                prunedColLists.put(tag, cols);
            }
        }
    }
    for (Operator<? extends OperatorDesc> child : childOperators) {
        if (child instanceof ReduceSinkOperator) {
            boolean[] flags = getPruneReduceSinkOpRetainFlags(toColumnNames(neededColList), (ReduceSinkOperator) child);
            pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx);
        }
    }
    for (int i = 0; i < outputCols.size(); i++) {
        String internalName = outputCols.get(i);
        ColumnInfo col = joinRS.getColumnInfo(internalName);
        rs.add(col);
    }
    LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
    op.setColumnExprMap(newColExprMap);
    conf.setOutputColumnNames(outputCols);
    op.getSchema().setSignature(rs);
    cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PTFOperator(org.apache.hadoop.hive.ql.exec.PTFOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) LateralViewForwardOperator(org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) UDTFOperator(org.apache.hadoop.hive.ql.exec.UDTFOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Map(java.util.Map) HashMap(java.util.HashMap)

Example 58 with ReduceSinkOperator

use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.

the class ConvertJoinMapJoin method convertJoinMapJoin.

/*
   * Once we have decided on the map join, the tree would transform from
   *
   *        |                   |
   *       Join               MapJoin
   *       / \                /   \
   *     RS   RS   --->     RS    TS (big table)
   *    /      \           /
   *   TS       TS        TS (small table)
   *
   * for tez.
   */
public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, boolean removeReduceSink) throws SemanticException {
    // of the constituent reduce sinks.
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        if (parentOp instanceof MuxOperator) {
            return null;
        }
    }
    // can safely convert the join to a map join.
    MapJoinOperator mapJoinOp = MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink);
    mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN));
    List<ExprNodeDesc> joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next();
    if (joinExprs.size() == 0) {
        // In case of cross join, we disable hybrid grace hash join
        mapJoinOp.getConf().setHybridHashJoin(false);
    }
    Operator<? extends OperatorDesc> parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition);
    if (parentBigTableOp instanceof ReduceSinkOperator) {
        Operator<?> parentSelectOpOfBigTableOp = parentBigTableOp.getParentOperators().get(0);
        if (removeReduceSink) {
            for (Operator<?> p : parentBigTableOp.getParentOperators()) {
                // we might have generated a dynamic partition operator chain. Since
                // we're removing the reduce sink we need do remove that too.
                Set<Operator<?>> dynamicPartitionOperators = new HashSet<Operator<?>>();
                Map<Operator<?>, AppMasterEventOperator> opEventPairs = new HashMap<>();
                for (Operator<?> c : p.getChildOperators()) {
                    AppMasterEventOperator event = findDynamicPartitionBroadcast(c);
                    if (event != null) {
                        dynamicPartitionOperators.add(c);
                        opEventPairs.put(c, event);
                    }
                }
                for (Operator<?> c : dynamicPartitionOperators) {
                    if (context.pruningOpsRemovedByPriorOpt.isEmpty() || !context.pruningOpsRemovedByPriorOpt.contains(opEventPairs.get(c))) {
                        p.removeChild(c);
                        // at this point we've found the fork in the op pipeline that has the pruning as a child plan.
                        LOG.info("Disabling dynamic pruning for: " + ((DynamicPruningEventDesc) opEventPairs.get(c).getConf()).getTableScan().getName() + ". Need to be removed together with reduce sink");
                    }
                }
                for (Operator<?> op : dynamicPartitionOperators) {
                    context.pruningOpsRemovedByPriorOpt.add(opEventPairs.get(op));
                }
            }
            mapJoinOp.getParentOperators().remove(bigTablePosition);
            if (!(mapJoinOp.getParentOperators().contains(parentBigTableOp.getParentOperators().get(0)))) {
                mapJoinOp.getParentOperators().add(bigTablePosition, parentBigTableOp.getParentOperators().get(0));
            }
            parentBigTableOp.getParentOperators().get(0).removeChild(parentBigTableOp);
        }
        for (Operator<? extends OperatorDesc> op : mapJoinOp.getParentOperators()) {
            if (!(op.getChildOperators().contains(mapJoinOp))) {
                op.getChildOperators().add(mapJoinOp);
            }
            op.getChildOperators().remove(joinOp);
        }
        // join which takes place in a separate task.
        if (context.parseContext.getRsToSemiJoinBranchInfo().size() > 0 && removeReduceSink) {
            removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp, context.parseContext);
        }
    }
    return mapJoinOp;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) HashMap(java.util.HashMap) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet)

Example 59 with ReduceSinkOperator

use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.

the class ConvertJoinMapJoin method convertJoinBucketMapJoin.

private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
    if (!checkConvertJoinBucketMapJoin(joinOp, bigTablePosition, tezBucketJoinProcCtx)) {
        LOG.info("Check conversion to bucket map join failed.");
        return false;
    }
    // Incase the join has extra keys other than bucketed columns, partition keys need to be updated
    // on small table(s).
    ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
    OpTraits opTraits = bigTableRS.getOpTraits();
    List<List<String>> listBucketCols = opTraits.getBucketColNames();
    ArrayList<ExprNodeDesc> bigTablePartitionCols = bigTableRS.getConf().getPartitionCols();
    boolean updatePartitionCols = false;
    List<Integer> positions = new ArrayList<>();
    if (listBucketCols.get(0).size() != bigTablePartitionCols.size()) {
        updatePartitionCols = true;
        // Prepare updated partition columns for small table(s).
        // Get the positions of bucketed columns
        int i = 0;
        Map<String, ExprNodeDesc> colExprMap = bigTableRS.getColumnExprMap();
        for (ExprNodeDesc bigTableExpr : bigTablePartitionCols) {
            // It is guaranteed there is only 1 list within listBucketCols.
            for (String colName : listBucketCols.get(0)) {
                if (colExprMap.get(colName).isSame(bigTableExpr)) {
                    positions.add(i++);
                }
            }
        }
    }
    MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition, true);
    if (mapJoinOp == null) {
        LOG.debug("Conversion to bucket map join failed.");
        return false;
    }
    MapJoinDesc joinDesc = mapJoinOp.getConf();
    joinDesc.setBucketMapJoin(true);
    // we can set the traits for this join operator
    opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks());
    mapJoinOp.setOpTraits(opTraits);
    mapJoinOp.setStatistics(joinOp.getStatistics());
    setNumberOfBucketsOnChildren(mapJoinOp);
    // Once the conversion is done, we can set the partitioner to bucket cols on the small table
    Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>();
    bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets());
    joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
    // Update the partition columns in small table to ensure correct routing of hash tables.
    if (updatePartitionCols) {
        // on the small table side.
        for (Operator<?> op : mapJoinOp.getParentOperators()) {
            if (!(op instanceof ReduceSinkOperator))
                continue;
            ;
            ReduceSinkOperator rsOp = (ReduceSinkOperator) op;
            ArrayList<ExprNodeDesc> newPartitionCols = new ArrayList<>();
            ArrayList<ExprNodeDesc> partitionCols = rsOp.getConf().getPartitionCols();
            for (Integer position : positions) {
                newPartitionCols.add(partitionCols.get(position));
            }
            rsOp.getConf().setPartitionCols(newPartitionCols);
        }
    }
    // Update the memory monitor info for LLAP.
    MemoryMonitorInfo memoryMonitorInfo = joinDesc.getMemoryMonitorInfo();
    if (memoryMonitorInfo.isLlap()) {
        memoryMonitorInfo.setHashTableInflationFactor(1);
        memoryMonitorInfo.setMemoryOverSubscriptionFactor(0);
    }
    return true;
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MemoryMonitorInfo(org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 60 with ReduceSinkOperator

use of org.apache.hadoop.hive.ql.exec.ReduceSinkOperator in project hive by apache.

the class ConvertJoinMapJoin method checkShuffleSizeForLargeTable.

/* Returns true if it passes the test, false otherwise. */
private boolean checkShuffleSizeForLargeTable(JoinOperator joinOp, int position, OptimizeTezProcContext context) {
    long max = HiveConf.getLongVar(context.parseContext.getConf(), HiveConf.ConfVars.HIVECONVERTJOINMAXSHUFFLESIZE);
    if (max < 1) {
        // Max is disabled, we can safely return false
        return false;
    }
    // Evaluate
    ReduceSinkOperator rsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(position);
    Statistics inputStats = rsOp.getStatistics();
    long inputSize = inputStats.getDataSize();
    LOG.debug("Estimated size for input {}: {}; Max size for DPHJ conversion: {}", position, inputSize, max);
    if (inputSize > max) {
        LOG.debug("Size of input is greater than the max; " + "we do not convert to DPHJ");
        return false;
    }
    return true;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) Statistics(org.apache.hadoop.hive.ql.plan.Statistics) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics)

Aggregations

ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)86 Operator (org.apache.hadoop.hive.ql.exec.Operator)50 ArrayList (java.util.ArrayList)48 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)45 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)35 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)31 HashMap (java.util.HashMap)29 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)28 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)27 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)26 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)26 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)25 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)24 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)23 List (java.util.List)19 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)19 LinkedHashMap (java.util.LinkedHashMap)18 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)18 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)18 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)15