Search in sources :

Example 1 with CommonJoinOperator

use of org.apache.hadoop.hive.ql.exec.CommonJoinOperator in project hive by apache.

the class ConvertJoinMapJoin method computeCumulativeCardinality.

// This is akin to CBO cumulative cardinality model
private static Long computeCumulativeCardinality(Operator<? extends OperatorDesc> op) {
    long cumulativeCardinality = 0L;
    if (op instanceof CommonJoinOperator) {
        // Choose max
        for (Operator<? extends OperatorDesc> inputOp : op.getParentOperators()) {
            Long inputCardinality = computeCumulativeCardinality(inputOp);
            if (inputCardinality == null) {
                return null;
            }
            if (inputCardinality > cumulativeCardinality) {
                cumulativeCardinality = inputCardinality;
            }
        }
    } else {
        // Choose cumulative
        for (Operator<? extends OperatorDesc> inputOp : op.getParentOperators()) {
            Long inputCardinality = computeCumulativeCardinality(inputOp);
            if (inputCardinality == null) {
                return null;
            }
            cumulativeCardinality += inputCardinality;
        }
    }
    Statistics currInputStat = op.getStatistics();
    if (currInputStat == null) {
        LOG.warn("Couldn't get statistics from: " + op);
        return null;
    }
    cumulativeCardinality += currInputStat.getNumRows();
    return cumulativeCardinality;
}
Also used : CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) Statistics(org.apache.hadoop.hive.ql.plan.Statistics) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics)

Example 2 with CommonJoinOperator

use of org.apache.hadoop.hive.ql.exec.CommonJoinOperator in project hive by apache.

the class ConvertJoinMapJoin method convertJoinSMBJoin.

// replaces the join operator with a new CommonJoinOperator, removes the
// parent reduce sinks
private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext context, int mapJoinConversionPos, int numBuckets, boolean adjustParentsChildren) throws SemanticException {
    MapJoinDesc mapJoinDesc = null;
    if (adjustParentsChildren) {
        mapJoinDesc = MapJoinProcessor.getMapJoinDesc(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), mapJoinConversionPos, true);
    } else {
        JoinDesc joinDesc = joinOp.getConf();
        // retain the original join desc in the map join.
        mapJoinDesc = new MapJoinDesc(MapJoinProcessor.getKeys(joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp).getSecond(), null, joinDesc.getExprs(), null, null, joinDesc.getOutputColumnNames(), mapJoinConversionPos, joinDesc.getConds(), joinDesc.getFilters(), joinDesc.getNoOuterJoin(), null);
        mapJoinDesc.setNullSafes(joinDesc.getNullSafes());
        mapJoinDesc.setFilterMap(joinDesc.getFilterMap());
        mapJoinDesc.setResidualFilterExprs(joinDesc.getResidualFilterExprs());
        mapJoinDesc.resetOrder();
    }
    CommonMergeJoinOperator mergeJoinOp = (CommonMergeJoinOperator) OperatorFactory.get(joinOp.getCompilationOpContext(), new CommonMergeJoinDesc(numBuckets, mapJoinConversionPos, mapJoinDesc), joinOp.getSchema());
    int numReduceSinks = joinOp.getOpTraits().getNumReduceSinks();
    OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), numBuckets, joinOp.getOpTraits().getSortCols(), numReduceSinks);
    mergeJoinOp.setOpTraits(opTraits);
    mergeJoinOp.setStatistics(joinOp.getStatistics());
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        int pos = parentOp.getChildOperators().indexOf(joinOp);
        parentOp.getChildOperators().remove(pos);
        parentOp.getChildOperators().add(pos, mergeJoinOp);
    }
    for (Operator<? extends OperatorDesc> childOp : joinOp.getChildOperators()) {
        int pos = childOp.getParentOperators().indexOf(joinOp);
        childOp.getParentOperators().remove(pos);
        childOp.getParentOperators().add(pos, mergeJoinOp);
    }
    List<Operator<? extends OperatorDesc>> childOperators = mergeJoinOp.getChildOperators();
    List<Operator<? extends OperatorDesc>> parentOperators = mergeJoinOp.getParentOperators();
    childOperators.clear();
    parentOperators.clear();
    childOperators.addAll(joinOp.getChildOperators());
    parentOperators.addAll(joinOp.getParentOperators());
    mergeJoinOp.getConf().setGenJoinKeys(false);
    if (adjustParentsChildren) {
        mergeJoinOp.getConf().setGenJoinKeys(true);
        List<Operator<? extends OperatorDesc>> newParentOpList = new ArrayList<Operator<? extends OperatorDesc>>();
        for (Operator<? extends OperatorDesc> parentOp : mergeJoinOp.getParentOperators()) {
            for (Operator<? extends OperatorDesc> grandParentOp : parentOp.getParentOperators()) {
                grandParentOp.getChildOperators().remove(parentOp);
                grandParentOp.getChildOperators().add(mergeJoinOp);
                newParentOpList.add(grandParentOp);
            }
        }
        mergeJoinOp.getParentOperators().clear();
        mergeJoinOp.getParentOperators().addAll(newParentOpList);
        List<Operator<? extends OperatorDesc>> parentOps = new ArrayList<Operator<? extends OperatorDesc>>(mergeJoinOp.getParentOperators());
        for (Operator<? extends OperatorDesc> parentOp : parentOps) {
            int parentIndex = mergeJoinOp.getParentOperators().indexOf(parentOp);
            if (parentIndex == mapJoinConversionPos) {
                continue;
            }
            // insert the dummy store operator here
            DummyStoreOperator dummyStoreOp = new TezDummyStoreOperator(mergeJoinOp.getCompilationOpContext());
            dummyStoreOp.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>());
            dummyStoreOp.setChildOperators(new ArrayList<Operator<? extends OperatorDesc>>());
            dummyStoreOp.getChildOperators().add(mergeJoinOp);
            int index = parentOp.getChildOperators().indexOf(mergeJoinOp);
            parentOp.getChildOperators().remove(index);
            parentOp.getChildOperators().add(index, dummyStoreOp);
            dummyStoreOp.getParentOperators().add(parentOp);
            mergeJoinOp.getParentOperators().remove(parentIndex);
            mergeJoinOp.getParentOperators().add(parentIndex, dummyStoreOp);
        }
    }
    mergeJoinOp.cloneOriginalParentsList(mergeJoinOp.getParentOperators());
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) CommonMergeJoinDesc(org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc) ArrayList(java.util.ArrayList) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) CommonMergeJoinDesc(org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)

Example 3 with CommonJoinOperator

use of org.apache.hadoop.hive.ql.exec.CommonJoinOperator in project hive by apache.

the class ColumnPrunerProcFactory method pruneJoinOperator.

private static void pruneJoinOperator(NodeProcessorCtx ctx, CommonJoinOperator op, JoinDesc conf, Map<String, ExprNodeDesc> columnExprMap, Map<Byte, List<Integer>> retainMap, boolean mapJoin) throws SemanticException {
    ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
    List<Operator<? extends OperatorDesc>> childOperators = op.getChildOperators();
    LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs());
    if (cppCtx.genColLists(op) == null) {
        return;
    }
    List<FieldNode> neededColList = new ArrayList<>(cppCtx.genColLists(op));
    Map<Byte, List<FieldNode>> prunedColLists = new HashMap<>();
    for (byte tag : conf.getTagOrder()) {
        prunedColLists.put(tag, new ArrayList<FieldNode>());
    }
    //add the columns in join filters
    Set<Map.Entry<Byte, List<ExprNodeDesc>>> filters = conf.getFilters().entrySet();
    Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iter = filters.iterator();
    while (iter.hasNext()) {
        Map.Entry<Byte, List<ExprNodeDesc>> entry = iter.next();
        Byte tag = entry.getKey();
        for (ExprNodeDesc desc : entry.getValue()) {
            List<FieldNode> cols = prunedColLists.get(tag);
            cols = mergeFieldNodesWithDesc(cols, desc);
            prunedColLists.put(tag, cols);
        }
    }
    //add the columns in residual filters
    if (conf.getResidualFilterExprs() != null) {
        for (ExprNodeDesc desc : conf.getResidualFilterExprs()) {
            neededColList = mergeFieldNodesWithDesc(neededColList, desc);
        }
    }
    RowSchema joinRS = op.getSchema();
    ArrayList<String> outputCols = new ArrayList<String>();
    ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> newColExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < conf.getOutputColumnNames().size(); i++) {
        String internalName = conf.getOutputColumnNames().get(i);
        ExprNodeDesc desc = columnExprMap.get(internalName);
        Byte tag = conf.getReversedExprs().get(internalName);
        if (lookupColumn(neededColList, internalName) == null) {
            int index = conf.getExprs().get(tag).indexOf(desc);
            if (index < 0) {
                continue;
            }
            conf.getExprs().get(tag).remove(desc);
            if (retainMap != null) {
                retainMap.get(tag).remove(index);
            }
        } else {
            List<FieldNode> prunedRSList = prunedColLists.get(tag);
            if (prunedRSList == null) {
                prunedRSList = new ArrayList<>();
                prunedColLists.put(tag, prunedRSList);
            }
            prunedColLists.put(tag, mergeFieldNodesWithDesc(prunedRSList, desc));
            outputCols.add(internalName);
            newColExprMap.put(internalName, desc);
        }
    }
    if (mapJoin) {
        // regenerate the valueTableDesc
        List<TableDesc> valueTableDescs = new ArrayList<TableDesc>();
        for (int pos = 0; pos < op.getParentOperators().size(); pos++) {
            List<ExprNodeDesc> valueCols = conf.getExprs().get(Byte.valueOf((byte) pos));
            StringBuilder keyOrder = new StringBuilder();
            for (int i = 0; i < valueCols.size(); i++) {
                keyOrder.append("+");
            }
            TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc(PlanUtils.getFieldSchemasFromColumnList(valueCols, "mapjoinvalue"));
            valueTableDescs.add(valueTableDesc);
        }
        ((MapJoinDesc) conf).setValueTblDescs(valueTableDescs);
        Set<Map.Entry<Byte, List<ExprNodeDesc>>> exprs = ((MapJoinDesc) conf).getKeys().entrySet();
        Iterator<Map.Entry<Byte, List<ExprNodeDesc>>> iters = exprs.iterator();
        while (iters.hasNext()) {
            Map.Entry<Byte, List<ExprNodeDesc>> entry = iters.next();
            List<ExprNodeDesc> lists = entry.getValue();
            for (int j = 0; j < lists.size(); j++) {
                ExprNodeDesc desc = lists.get(j);
                Byte tag = entry.getKey();
                List<FieldNode> cols = prunedColLists.get(tag);
                cols = mergeFieldNodesWithDesc(cols, desc);
                prunedColLists.put(tag, cols);
            }
        }
    }
    for (Operator<? extends OperatorDesc> child : childOperators) {
        if (child instanceof ReduceSinkOperator) {
            boolean[] flags = getPruneReduceSinkOpRetainFlags(toColumnNames(neededColList), (ReduceSinkOperator) child);
            pruneReduceSinkOperator(flags, (ReduceSinkOperator) child, cppCtx);
        }
    }
    for (int i = 0; i < outputCols.size(); i++) {
        String internalName = outputCols.get(i);
        ColumnInfo col = joinRS.getColumnInfo(internalName);
        rs.add(col);
    }
    LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs());
    op.setColumnExprMap(newColExprMap);
    conf.setOutputColumnNames(outputCols);
    op.getSchema().setSignature(rs);
    cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
}
Also used : LateralViewJoinOperator(org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PTFOperator(org.apache.hadoop.hive.ql.exec.PTFOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) LateralViewForwardOperator(org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) UDTFOperator(org.apache.hadoop.hive.ql.exec.UDTFOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) List(java.util.List) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

CommonJoinOperator (org.apache.hadoop.hive.ql.exec.CommonJoinOperator)3 ArrayList (java.util.ArrayList)2 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)2 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)2 Operator (org.apache.hadoop.hive.ql.exec.Operator)2 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)2 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)2 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)2 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)2 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)2 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)1 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)1 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1