Search in sources :

Example 1 with UnionDesc

use of org.apache.hadoop.hive.ql.plan.UnionDesc in project hive by apache.

the class HiveOpConverter method visit.

OpAttr visit(HiveUnion unionRel) throws SemanticException {
    // 1. Convert inputs
    List<RelNode> inputsList = extractRelNodeFromUnion(unionRel);
    OpAttr[] inputs = new OpAttr[inputsList.size()];
    for (int i = 0; i < inputs.length; i++) {
        inputs[i] = dispatch(inputsList.get(i));
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName() + " with row type: [" + unionRel.getRowType() + "]");
    }
    // 2. Create a new union operator
    UnionDesc unionDesc = new UnionDesc();
    unionDesc.setNumInputs(inputs.length);
    String tableAlias = getHiveDerivedTableAlias();
    ArrayList<ColumnInfo> cinfoLst = createColInfos(inputs[0].inputs.get(0), tableAlias);
    Operator<?>[] children = new Operator<?>[inputs.length];
    for (int i = 0; i < children.length; i++) {
        if (i == 0) {
            children[i] = inputs[i].inputs.get(0);
        } else {
            Operator<?> op = inputs[i].inputs.get(0);
            // We need to check if the other input branches for union is following the first branch
            // We may need to cast the data types for specific columns.
            children[i] = genInputSelectForUnion(op, cinfoLst);
        }
    }
    Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild(semanticAnalyzer.getOpContext(), unionDesc, new RowSchema(cinfoLst), children);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]");
    }
    // 3. Return result
    return new OpAttr(tableAlias, inputs[0].vcolsInCalcite, unionOp);
}
Also used : UnionDesc(org.apache.hadoop.hive.ql.plan.UnionDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) RelNode(org.apache.calcite.rel.RelNode)

Example 2 with UnionDesc

use of org.apache.hadoop.hive.ql.plan.UnionDesc in project hive by apache.

the class SemanticAnalyzer method genUnionPlan.

@SuppressWarnings("nls")
private Operator genUnionPlan(String unionalias, String leftalias, Operator leftOp, String rightalias, Operator rightOp) throws SemanticException {
    // Currently, the unions are not merged - each union has only 2 parents. So,
    // a n-way union will lead to (n-1) union operators.
    // This can be easily merged into 1 union
    RowResolver leftRR = opParseCtx.get(leftOp).getRowResolver();
    RowResolver rightRR = opParseCtx.get(rightOp).getRowResolver();
    LinkedHashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
    LinkedHashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
    // make sure the schemas of both sides are the same
    ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias(qb.getAliases().get(0));
    if (leftmap.size() != rightmap.size()) {
        throw new SemanticException("Schema of both sides of union should match.");
    }
    RowResolver unionoutRR = new RowResolver();
    Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator();
    Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator();
    while (lIter.hasNext()) {
        Map.Entry<String, ColumnInfo> lEntry = lIter.next();
        Map.Entry<String, ColumnInfo> rEntry = rIter.next();
        ColumnInfo lInfo = lEntry.getValue();
        ColumnInfo rInfo = rEntry.getValue();
        // use left alias (~mysql, postgresql)
        String field = lEntry.getKey();
        // try widening conversion, otherwise fail union
        TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType());
        if (commonTypeInfo == null) {
            throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table"));
        }
        ColumnInfo unionColInfo = new ColumnInfo(lInfo);
        unionColInfo.setType(commonTypeInfo);
        unionoutRR.put(unionalias, field, unionColInfo);
    }
    // For Spark,TEZ we rely on the generated SelectOperator to do the type casting.
    // Consider:
    //    SEL_1 (int)   SEL_2 (int)    SEL_3 (double)
    // If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1
    // with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error
    // will happen afterwards. The solution here is to insert one after UNION_1, which
    // cast int to double.
    boolean isMR = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr");
    if (!isMR || !(leftOp instanceof UnionOperator)) {
        leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias);
    }
    if (!isMR || !(rightOp instanceof UnionOperator)) {
        rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias);
    }
    // else create a new one
    if (leftOp instanceof UnionOperator || (leftOp instanceof SelectOperator && leftOp.getParentOperators() != null && !leftOp.getParentOperators().isEmpty() && leftOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) leftOp).isIdentitySelect())) {
        if (!(leftOp instanceof UnionOperator)) {
            Operator oldChild = leftOp;
            leftOp = (Operator) leftOp.getParentOperators().get(0);
            leftOp.removeChildAndAdoptItsChildren(oldChild);
        }
        // make left a child of right
        List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
        child.add(leftOp);
        rightOp.setChildOperators(child);
        List<Operator<? extends OperatorDesc>> parent = leftOp.getParentOperators();
        parent.add(rightOp);
        UnionDesc uDesc = ((UnionOperator) leftOp).getConf();
        uDesc.setNumInputs(uDesc.getNumInputs() + 1);
        return putOpInsertMap(leftOp, unionoutRR);
    }
    if (rightOp instanceof UnionOperator || (rightOp instanceof SelectOperator && rightOp.getParentOperators() != null && !rightOp.getParentOperators().isEmpty() && rightOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) rightOp).isIdentitySelect())) {
        if (!(rightOp instanceof UnionOperator)) {
            Operator oldChild = rightOp;
            rightOp = (Operator) rightOp.getParentOperators().get(0);
            rightOp.removeChildAndAdoptItsChildren(oldChild);
        }
        // make right a child of left
        List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
        child.add(rightOp);
        leftOp.setChildOperators(child);
        List<Operator<? extends OperatorDesc>> parent = rightOp.getParentOperators();
        parent.add(leftOp);
        UnionDesc uDesc = ((UnionOperator) rightOp).getConf();
        uDesc.setNumInputs(uDesc.getNumInputs() + 1);
        return putOpInsertMap(rightOp, unionoutRR);
    }
    // Create a new union operator
    Operator<? extends OperatorDesc> unionforward = OperatorFactory.getAndMakeChild(getOpContext(), new UnionDesc(), new RowSchema(unionoutRR.getColumnInfos()));
    // set union operator as child of each of leftOp and rightOp
    List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
    child.add(unionforward);
    rightOp.setChildOperators(child);
    child = new ArrayList<Operator<? extends OperatorDesc>>();
    child.add(unionforward);
    leftOp.setChildOperators(child);
    List<Operator<? extends OperatorDesc>> parent = new ArrayList<Operator<? extends OperatorDesc>>();
    parent.add(leftOp);
    parent.add(rightOp);
    unionforward.setParentOperators(parent);
    // create operator info list to return
    return putOpInsertMap(unionforward, unionoutRR);
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) UnionDesc(org.apache.hadoop.hive.ql.plan.UnionDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Entry(java.util.Map.Entry) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)2 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)2 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)2 Operator (org.apache.hadoop.hive.ql.exec.Operator)2 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)2 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)2 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)2 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)2 UnionDesc (org.apache.hadoop.hive.ql.plan.UnionDesc)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 RelNode (org.apache.calcite.rel.RelNode)1 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)1 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)1 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)1