Search in sources :

Example 26 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class LineageLogger method getEdges.

/**
 * Based on the final select operator, find out all the target columns.
 * For each target column, find out its sources based on the dependency index.
 */
@VisibleForTesting
public static List<Edge> getEdges(QueryPlan plan, Index index) {
    LinkedHashMap<String, ObjectPair<SelectOperator, org.apache.hadoop.hive.ql.metadata.Table>> finalSelOps = index.getFinalSelectOps();
    Map<String, Vertex> vertexCache = new LinkedHashMap<String, Vertex>();
    List<Edge> edges = new ArrayList<Edge>();
    for (ObjectPair<SelectOperator, org.apache.hadoop.hive.ql.metadata.Table> pair : finalSelOps.values()) {
        List<FieldSchema> fieldSchemas = plan.getResultSchema().getFieldSchemas();
        SelectOperator finalSelOp = pair.getFirst();
        org.apache.hadoop.hive.ql.metadata.Table t = pair.getSecond();
        String destTableName = null;
        List<String> colNames = null;
        if (t != null) {
            destTableName = t.getFullyQualifiedName();
            fieldSchemas = t.getCols();
        } else {
            // Based on the plan outputs, find out the target table name and column names.
            for (WriteEntity output : plan.getOutputs()) {
                Entity.Type entityType = output.getType();
                if (entityType == Entity.Type.TABLE || entityType == Entity.Type.PARTITION) {
                    t = output.getTable();
                    destTableName = t.getFullyQualifiedName();
                    List<FieldSchema> cols = t.getCols();
                    if (cols != null && !cols.isEmpty()) {
                        colNames = Utilities.getColumnNamesFromFieldSchema(cols);
                    }
                    break;
                }
            }
        }
        Map<ColumnInfo, Dependency> colMap = index.getDependencies(finalSelOp);
        List<Dependency> dependencies = colMap != null ? Lists.newArrayList(colMap.values()) : null;
        int fields = fieldSchemas.size();
        if (t != null && colMap != null && fields < colMap.size()) {
            // Dynamic partition keys should be added to field schemas.
            List<FieldSchema> partitionKeys = t.getPartitionKeys();
            int dynamicKeyCount = colMap.size() - fields;
            int keyOffset = partitionKeys.size() - dynamicKeyCount;
            if (keyOffset >= 0) {
                fields += dynamicKeyCount;
                for (int i = 0; i < dynamicKeyCount; i++) {
                    FieldSchema field = partitionKeys.get(keyOffset + i);
                    fieldSchemas.add(field);
                    if (colNames != null) {
                        colNames.add(field.getName());
                    }
                }
            }
        }
        if (dependencies == null || dependencies.size() != fields) {
            log("Result schema has " + fields + " fields, but we don't get as many dependencies");
        } else {
            // Go through each target column, generate the lineage edges.
            Set<Vertex> targets = new LinkedHashSet<Vertex>();
            for (int i = 0; i < fields; i++) {
                Vertex target = getOrCreateVertex(vertexCache, getTargetFieldName(i, destTableName, colNames, fieldSchemas), Vertex.Type.COLUMN);
                targets.add(target);
                Dependency dep = dependencies.get(i);
                addEdge(vertexCache, edges, dep.getBaseCols(), target, dep.getExpr(), Edge.Type.PROJECTION);
            }
            Set<Predicate> conds = index.getPredicates(finalSelOp);
            if (conds != null && !conds.isEmpty()) {
                for (Predicate cond : conds) {
                    addEdge(vertexCache, edges, cond.getBaseCols(), new LinkedHashSet<Vertex>(targets), cond.getExpr(), Edge.Type.PREDICATE);
                }
            }
        }
    }
    return edges;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) BaseColumnInfo(org.apache.hadoop.hive.ql.hooks.LineageInfo.BaseColumnInfo) LinkedHashMap(java.util.LinkedHashMap) Predicate(org.apache.hadoop.hive.ql.hooks.LineageInfo.Predicate) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) Table(org.apache.hadoop.hive.metastore.api.Table) Dependency(org.apache.hadoop.hive.ql.hooks.LineageInfo.Dependency) ObjectPair(org.apache.hadoop.hive.common.ObjectPair) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 27 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class SemanticAnalyzer method genLateralViewPlans.

/**
 * Generates the operator DAG needed to implement lateral views and attaches
 * it to the TS operator.
 *
 * @param aliasToOpInfo
 *          A mapping from a table alias to the TS operator. This function
 *          replaces the operator mapping as necessary
 * @param qb
 * @throws SemanticException
 */
void genLateralViewPlans(Map<String, Operator> aliasToOpInfo, QB qb) throws SemanticException {
    Map<String, ArrayList<ASTNode>> aliasToLateralViews = qb.getParseInfo().getAliasToLateralViews();
    for (Entry<String, Operator> e : aliasToOpInfo.entrySet()) {
        String alias = e.getKey();
        // See if the alias has a lateral view. If so, chain the lateral view
        // operator on
        ArrayList<ASTNode> lateralViews = aliasToLateralViews.get(alias);
        if (lateralViews != null) {
            Operator op = e.getValue();
            for (ASTNode lateralViewTree : aliasToLateralViews.get(alias)) {
                // There are 2 paths from the TS operator (or a previous LVJ operator)
                // to the same LateralViewJoinOperator.
                // TS -> SelectOperator(*) -> LateralViewJoinOperator
                // TS -> SelectOperator (gets cols for UDTF) -> UDTFOperator0
                // -> LateralViewJoinOperator
                // 
                Operator lateralViewJoin = genLateralViewPlan(qb, op, lateralViewTree);
                op = lateralViewJoin;
            }
            e.setValue(op);
        }
    }
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ArrayList(java.util.ArrayList)

Example 28 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class SemanticAnalyzer method genUnionPlan.

@SuppressWarnings("nls")
private Operator genUnionPlan(String unionalias, String leftalias, Operator leftOp, String rightalias, Operator rightOp) throws SemanticException {
    // Currently, the unions are not merged - each union has only 2 parents. So,
    // a n-way union will lead to (n-1) union operators.
    // This can be easily merged into 1 union
    RowResolver leftRR = opParseCtx.get(leftOp).getRowResolver();
    RowResolver rightRR = opParseCtx.get(rightOp).getRowResolver();
    LinkedHashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
    LinkedHashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
    // make sure the schemas of both sides are the same
    ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias(qb.getAliases().get(0));
    if (leftmap.size() != rightmap.size()) {
        throw new SemanticException("Schema of both sides of union should match.");
    }
    RowResolver unionoutRR = new RowResolver();
    Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator();
    Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator();
    while (lIter.hasNext()) {
        Map.Entry<String, ColumnInfo> lEntry = lIter.next();
        Map.Entry<String, ColumnInfo> rEntry = rIter.next();
        ColumnInfo lInfo = lEntry.getValue();
        ColumnInfo rInfo = rEntry.getValue();
        // use left alias (~mysql, postgresql)
        String field = lEntry.getKey();
        // try widening conversion, otherwise fail union
        TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType());
        if (commonTypeInfo == null) {
            throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table"));
        }
        ColumnInfo unionColInfo = new ColumnInfo(lInfo);
        unionColInfo.setType(commonTypeInfo);
        unionoutRR.put(unionalias, field, unionColInfo);
    }
    // For Spark,TEZ we rely on the generated SelectOperator to do the type casting.
    // Consider:
    // SEL_1 (int)   SEL_2 (int)    SEL_3 (double)
    // If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1
    // with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error
    // will happen afterwards. The solution here is to insert one after UNION_1, which
    // cast int to double.
    boolean isMR = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr");
    if (!isMR || !(leftOp instanceof UnionOperator)) {
        leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias);
    }
    if (!isMR || !(rightOp instanceof UnionOperator)) {
        rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias);
    }
    // else create a new one
    if (leftOp instanceof UnionOperator || (leftOp instanceof SelectOperator && leftOp.getParentOperators() != null && !leftOp.getParentOperators().isEmpty() && leftOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) leftOp).isIdentitySelect())) {
        if (!(leftOp instanceof UnionOperator)) {
            Operator oldChild = leftOp;
            leftOp = (Operator) leftOp.getParentOperators().get(0);
            leftOp.removeChildAndAdoptItsChildren(oldChild);
        }
        // make left a child of right
        List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
        child.add(leftOp);
        rightOp.setChildOperators(child);
        List<Operator<? extends OperatorDesc>> parent = leftOp.getParentOperators();
        parent.add(rightOp);
        UnionDesc uDesc = ((UnionOperator) leftOp).getConf();
        uDesc.setNumInputs(uDesc.getNumInputs() + 1);
        return putOpInsertMap(leftOp, unionoutRR);
    }
    if (rightOp instanceof UnionOperator || (rightOp instanceof SelectOperator && rightOp.getParentOperators() != null && !rightOp.getParentOperators().isEmpty() && rightOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) rightOp).isIdentitySelect())) {
        if (!(rightOp instanceof UnionOperator)) {
            Operator oldChild = rightOp;
            rightOp = (Operator) rightOp.getParentOperators().get(0);
            rightOp.removeChildAndAdoptItsChildren(oldChild);
        }
        // make right a child of left
        List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
        child.add(rightOp);
        leftOp.setChildOperators(child);
        List<Operator<? extends OperatorDesc>> parent = rightOp.getParentOperators();
        parent.add(leftOp);
        UnionDesc uDesc = ((UnionOperator) rightOp).getConf();
        uDesc.setNumInputs(uDesc.getNumInputs() + 1);
        return putOpInsertMap(rightOp, unionoutRR);
    }
    // Create a new union operator
    Operator<? extends OperatorDesc> unionforward = OperatorFactory.getAndMakeChild(getOpContext(), new UnionDesc(), new RowSchema(unionoutRR.getColumnInfos()));
    // set union operator as child of each of leftOp and rightOp
    List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
    child.add(unionforward);
    rightOp.setChildOperators(child);
    child = new ArrayList<Operator<? extends OperatorDesc>>();
    child.add(unionforward);
    leftOp.setChildOperators(child);
    List<Operator<? extends OperatorDesc>> parent = new ArrayList<Operator<? extends OperatorDesc>>();
    parent.add(leftOp);
    parent.add(rightOp);
    unionforward.setParentOperators(parent);
    // create operator info list to return
    return putOpInsertMap(unionforward, unionoutRR);
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) UnionDesc(org.apache.hadoop.hive.ql.plan.UnionDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Entry(java.util.Map.Entry) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 29 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class SemanticAnalyzer method genPlan.

@SuppressWarnings("nls")
public Operator genPlan(QB qb, boolean skipAmbiguityCheck) throws SemanticException {
    // First generate all the opInfos for the elements in the from clause
    // Must be deterministic order map - see HIVE-8707
    Map<String, Operator> aliasToOpInfo = new LinkedHashMap<String, Operator>();
    // Recurse over the subqueries to fill the subquery part of the plan
    for (String alias : qb.getSubqAliases()) {
        QBExpr qbexpr = qb.getSubqForAlias(alias);
        Operator<?> operator = genPlan(qb, qbexpr);
        aliasToOpInfo.put(alias, operator);
        if (qb.getViewToTabSchema().containsKey(alias)) {
            // we set viewProjectToTableSchema so that we can leverage ColumnPruner.
            if (operator instanceof LimitOperator) {
                // If create view has LIMIT operator, this can happen
                // Fetch parent operator
                operator = operator.getParentOperators().get(0);
            }
            if (operator instanceof SelectOperator) {
                if (this.viewProjectToTableSchema == null) {
                    this.viewProjectToTableSchema = new LinkedHashMap<>();
                }
                viewProjectToTableSchema.put((SelectOperator) operator, qb.getViewToTabSchema().get(alias));
            } else {
                throw new SemanticException("View " + alias + " is corresponding to " + operator.getType().name() + ", rather than a SelectOperator.");
            }
        }
    }
    // Recurse over all the source tables
    for (String alias : qb.getTabAliases()) {
        if (alias.equals(DUMMY_TABLE)) {
            continue;
        }
        Operator op = genTablePlan(alias, qb);
        aliasToOpInfo.put(alias, op);
    }
    if (aliasToOpInfo.isEmpty()) {
        qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable());
        TableScanOperator op = (TableScanOperator) genTablePlan(DUMMY_TABLE, qb);
        op.getConf().setRowLimit(1);
        qb.addAlias(DUMMY_TABLE);
        qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE);
        aliasToOpInfo.put(DUMMY_TABLE, op);
    }
    Operator srcOpInfo = null;
    Operator lastPTFOp = null;
    if (queryProperties.hasPTF()) {
        // After processing subqueries and source tables, process
        // partitioned table functions
        HashMap<ASTNode, PTFInvocationSpec> ptfNodeToSpec = qb.getPTFNodeToSpec();
        if (ptfNodeToSpec != null) {
            for (Entry<ASTNode, PTFInvocationSpec> entry : ptfNodeToSpec.entrySet()) {
                ASTNode ast = entry.getKey();
                PTFInvocationSpec spec = entry.getValue();
                String inputAlias = spec.getQueryInputName();
                Operator inOp = aliasToOpInfo.get(inputAlias);
                if (inOp == null) {
                    throw new SemanticException(generateErrorMessage(ast, "Cannot resolve input Operator for PTF invocation"));
                }
                lastPTFOp = genPTFPlan(spec, inOp);
                String ptfAlias = spec.getFunction().getAlias();
                if (ptfAlias != null) {
                    aliasToOpInfo.put(ptfAlias, lastPTFOp);
                }
            }
        }
    }
    // For all the source tables that have a lateral view, attach the
    // appropriate operators to the TS
    genLateralViewPlans(aliasToOpInfo, qb);
    // process join
    if (qb.getParseInfo().getJoinExpr() != null) {
        ASTNode joinExpr = qb.getParseInfo().getJoinExpr();
        if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) {
            QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr, aliasToOpInfo);
            qb.setQbJoinTree(joinTree);
        } else {
            QBJoinTree joinTree = genJoinTree(qb, joinExpr, aliasToOpInfo);
            qb.setQbJoinTree(joinTree);
            /*
         * if there is only one destination in Query try to push where predicates
         * as Join conditions
         */
            Set<String> dests = qb.getParseInfo().getClauseNames();
            if (dests.size() == 1 && joinTree.getNoOuterJoin()) {
                String dest = dests.iterator().next();
                ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest);
                if (whereClause != null) {
                    extractJoinCondsFromWhereClause(joinTree, qb, dest, (ASTNode) whereClause.getChild(0), aliasToOpInfo);
                }
            }
            if (!disableJoinMerge) {
                mergeJoinTree(qb);
            }
        }
        // if any filters are present in the join tree, push them on top of the
        // table
        pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo);
        srcOpInfo = genJoinPlan(qb, aliasToOpInfo);
    } else {
        // Now if there are more than 1 sources then we have a join case
        // later we can extend this to the union all case as well
        srcOpInfo = aliasToOpInfo.values().iterator().next();
        // with ptfs, there maybe more (note for PTFChains:
        // 1 ptf invocation may entail multiple PTF operators)
        srcOpInfo = lastPTFOp != null ? lastPTFOp : srcOpInfo;
    }
    Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo, aliasToOpInfo);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created Plan for Query Block " + qb.getId());
    }
    if (qb.getAlias() != null) {
        rewriteRRForSubQ(qb.getAlias(), bodyOpInfo, skipAmbiguityCheck);
    }
    setQB(qb);
    return bodyOpInfo;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) LinkedHashMap(java.util.LinkedHashMap) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 30 with SelectOperator

use of org.apache.hadoop.hive.ql.exec.SelectOperator in project hive by apache.

the class TezCompiler method removeSemiJoinCyclesDueToMapsideJoins.

private static void removeSemiJoinCyclesDueToMapsideJoins(OptimizeTezProcContext procCtx) throws SemanticException {
    if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
        return;
    }
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" + MapJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
    opRules.put(new RuleRegExp("R2", MapJoinOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
    opRules.put(new RuleRegExp("R3", CommonMergeJoinOperator.getOperatorName() + "%" + MapJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
    opRules.put(new RuleRegExp("R4", CommonMergeJoinOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SemiJoinCycleRemovalDueToMapsideJoins());
    SemiJoinCycleRemovalDueTOMapsideJoinContext ctx = new SemiJoinCycleRemovalDueTOMapsideJoinContext();
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    GraphWalker ogw = new PreOrderOnceWalker(disp);
    ogw.startWalking(topNodes, null);
    // process the list
    ParseContext pCtx = procCtx.parseContext;
    for (Operator<?> parentJoin : ctx.childParentMap.keySet()) {
        Operator<?> childJoin = ctx.childParentMap.get(parentJoin);
        if (parentJoin.getChildOperators().size() == 1) {
            continue;
        }
        for (Operator<?> child : parentJoin.getChildOperators()) {
            if (!(child instanceof SelectOperator)) {
                continue;
            }
            while (child.getChildOperators().size() > 0) {
                child = child.getChildOperators().get(0);
            }
            if (!(child instanceof ReduceSinkOperator)) {
                continue;
            }
            ReduceSinkOperator rs = ((ReduceSinkOperator) child);
            SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
            if (sjInfo == null) {
                continue;
            }
            TableScanOperator ts = sjInfo.getTsOp();
            // cycle with childJoin.
            for (Operator<?> parent : childJoin.getParentOperators()) {
                if (parent == parentJoin) {
                    continue;
                }
                assert parent instanceof ReduceSinkOperator;
                while (parent.getParentOperators().size() > 0) {
                    parent = parent.getParentOperators().get(0);
                }
                if (parent == ts) {
                    // We have a cycle!
                    if (sjInfo.getIsHint()) {
                        throw new SemanticException("Removing hinted semijoin as it is creating cycles with mapside joins " + rs + " : " + ts);
                    }
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Semijoin cycle due to mapjoin. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
                    }
                    GenTezUtils.removeBranch(rs);
                    GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
                }
            }
        }
    }
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) PreOrderOnceWalker(org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Aggregations

SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)31 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)20 ArrayList (java.util.ArrayList)14 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)13 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)11 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)10 HashMap (java.util.HashMap)8 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)8 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)8 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)8 LinkedHashMap (java.util.LinkedHashMap)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)7 Operator (org.apache.hadoop.hive.ql.exec.Operator)7 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)7 Test (org.junit.Test)7 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)6 ExprNodeFieldDesc (org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc)6 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)4