Search in sources :

Example 66 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genUDTFPlan.

private Operator genUDTFPlan(GenericUDTF genericUDTF, String outputTableAlias, ArrayList<String> colAliases, QB qb, Operator input, boolean outerLV) throws SemanticException {
    // No GROUP BY / DISTRIBUTE BY / SORT BY / CLUSTER BY
    QBParseInfo qbp = qb.getParseInfo();
    if (!qbp.getDestToGroupBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_GROUP_BY.getMsg());
    }
    if (!qbp.getDestToDistributeBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_DISTRIBUTE_BY.getMsg());
    }
    if (!qbp.getDestToSortBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_SORT_BY.getMsg());
    }
    if (!qbp.getDestToClusterBy().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_NO_CLUSTER_BY.getMsg());
    }
    if (!qbp.getAliasToLateralViews().isEmpty()) {
        throw new SemanticException(ErrorMsg.UDTF_LATERAL_VIEW.getMsg());
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Table alias: " + outputTableAlias + " Col aliases: " + colAliases);
    }
    // Use the RowResolver from the input operator to generate a input
    // ObjectInspector that can be used to initialize the UDTF. Then, the
    // resulting output object inspector can be used to make the RowResolver
    // for the UDTF operator
    RowResolver selectRR = opParseCtx.get(input).getRowResolver();
    ArrayList<ColumnInfo> inputCols = selectRR.getColumnInfos();
    // Create the object inspector for the input columns and initialize the UDTF
    ArrayList<String> colNames = new ArrayList<String>();
    ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()];
    for (int i = 0; i < inputCols.size(); i++) {
        colNames.add(inputCols.get(i).getInternalName());
        colOIs[i] = inputCols.get(i).getObjectInspector();
    }
    StandardStructObjectInspector rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, Arrays.asList(colOIs));
    StructObjectInspector outputOI = genericUDTF.initialize(rowOI);
    int numUdtfCols = outputOI.getAllStructFieldRefs().size();
    if (colAliases.isEmpty()) {
        // user did not specfied alias names, infer names from outputOI
        for (StructField field : outputOI.getAllStructFieldRefs()) {
            colAliases.add(field.getFieldName());
        }
    }
    // Make sure that the number of column aliases in the AS clause matches
    // the number of columns output by the UDTF
    int numSuppliedAliases = colAliases.size();
    if (numUdtfCols != numSuppliedAliases) {
        throw new SemanticException(ErrorMsg.UDTF_ALIAS_MISMATCH.getMsg("expected " + numUdtfCols + " aliases " + "but got " + numSuppliedAliases));
    }
    // Generate the output column info's / row resolver using internal names.
    ArrayList<ColumnInfo> udtfCols = new ArrayList<ColumnInfo>();
    Iterator<String> colAliasesIter = colAliases.iterator();
    for (StructField sf : outputOI.getAllStructFieldRefs()) {
        String colAlias = colAliasesIter.next();
        assert (colAlias != null);
        // Since the UDTF operator feeds into a LVJ operator that will rename
        // all the internal names, we can just use field name from the UDTF's OI
        // as the internal name
        ColumnInfo col = new ColumnInfo(sf.getFieldName(), TypeInfoUtils.getTypeInfoFromObjectInspector(sf.getFieldObjectInspector()), outputTableAlias, false);
        udtfCols.add(col);
    }
    // Create the row resolver for this operator from the output columns
    RowResolver out_rwsch = new RowResolver();
    for (int i = 0; i < udtfCols.size(); i++) {
        out_rwsch.put(outputTableAlias, colAliases.get(i), udtfCols.get(i));
    }
    // Add the UDTFOperator to the operator DAG
    Operator<?> udtf = putOpInsertMap(OperatorFactory.getAndMakeChild(new UDTFDesc(genericUDTF, outerLV), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
    return udtf;
}
Also used : StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) UDTFDesc(org.apache.hadoop.hive.ql.plan.UDTFDesc) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 67 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genMapGroupByForSemijoin.

private // the
Operator genMapGroupByForSemijoin(// the
QB qb, // the
ArrayList<ASTNode> fields, // "tab.col"
Operator inputOperatorInfo, GroupByDesc.Mode mode) throws SemanticException {
    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    qb.getParseInfo();
    // join keys should only
    groupByOutputRowResolver.setIsExprResolver(true);
    for (int i = 0; i < fields.size(); ++i) {
        // get the group by keys to ColumnInfo
        ASTNode colName = fields.get(i);
        ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
        groupByKeys.add(grpByExprNode);
        // generate output column names
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false);
        groupByOutputRowResolver.putExpression(colName, colInfo2);
        // establish mapping from the output column to the input column
        colExprMap.put(field, grpByExprNode);
    }
    // Generate group-by operator
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, false, groupByMemoryUsage, memoryThreshold, null, false, -1, false), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 68 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genPTFPlanForComponentQuery.

private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input) throws SemanticException {
    /*
     * 1. Create the PTFDesc from the Qspec attached to this QB.
     */
    RowResolver rr = opParseCtx.get(input).getRowResolver();
    PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
    /*
     * 2. build Map-side Op Graph. Graph template is either:
     * Input -> PTF_map -> ReduceSink
     * or
     * Input -> ReduceSink
     *
     * Here the ExprNodeDescriptors in the QueryDef are based on the Input Operator's RR.
     */
    {
        PartitionedTableFunctionDef tabDef = ptfDesc.getStartOfChain();
        /*
       * a. add Map-side PTF Operator if needed
       */
        if (tabDef.isTransformsRawInput()) {
            RowResolver ptfMapRR = tabDef.getRawInputShape().getRr();
            ptfDesc.setMapSide(true);
            input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfMapRR.getColumnInfos()), input), ptfMapRR);
            rr = opParseCtx.get(input).getRowResolver();
        }
        /*
       * b. Build Reduce Sink Details (keyCols, valueCols, outColNames etc.) for this ptfDesc.
       */
        ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
        ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
        StringBuilder orderString = new StringBuilder();
        StringBuilder nullOrderString = new StringBuilder();
        /*
       * Use the input RR of TableScanOperator in case there is no map-side
       * reshape of input.
       * If the parent of ReduceSinkOperator is PTFOperator, use it's
       * output RR.
       */
        buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString, nullOrderString);
        input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), nullOrderString.toString(), -1, Operation.NOT_ACID);
    }
    /*
     * 3. build Reduce-side Op Graph
     */
    {
        /*
       * c. Rebuilt the QueryDef.
       * Why?
       * - so that the ExprNodeDescriptors in the QueryDef are based on the
       *   Select Operator's RowResolver
       */
        rr = opParseCtx.get(input).getRowResolver();
        ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
        /*
       * d. Construct PTF Operator.
       */
        RowResolver ptfOpRR = ptfDesc.getFuncDef().getOutputShape().getRr();
        input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc, new RowSchema(ptfOpRR.getColumnInfos()), input), ptfOpRR);
    }
    return input;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) ArrayList(java.util.ArrayList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef)

Example 69 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genReduceSinkPlan.

@SuppressWarnings("nls")
private Operator genReduceSinkPlan(Operator<?> input, ArrayList<ExprNodeDesc> partitionCols, ArrayList<ExprNodeDesc> sortCols, String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, boolean pullConstants) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(input));
    ArrayList<ExprNodeDesc> newSortCols = new ArrayList<ExprNodeDesc>();
    StringBuilder newSortOrder = new StringBuilder();
    StringBuilder newNullOrder = new StringBuilder();
    ArrayList<ExprNodeDesc> sortColsBack = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < sortCols.size(); i++) {
        ExprNodeDesc sortCol = sortCols.get(i);
        // we are pulling constants but this is not a constant
        if (!pullConstants || !(sortCol instanceof ExprNodeConstantDesc)) {
            newSortCols.add(sortCol);
            newSortOrder.append(sortOrder.charAt(i));
            newNullOrder.append(nullOrder.charAt(i));
            sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input));
        }
    }
    // For the generation of the values expression just get the inputs
    // signature and generate field expressions for those
    RowResolver rsRR = new RowResolver();
    ArrayList<String> outputColumns = new ArrayList<String>();
    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> valueColsBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<ExprNodeDesc> constantCols = new ArrayList<ExprNodeDesc>();
    ArrayList<ColumnInfo> columnInfos = inputRR.getColumnInfos();
    int[] index = new int[columnInfos.size()];
    for (int i = 0; i < index.length; i++) {
        ColumnInfo colInfo = columnInfos.get(i);
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input);
        if (pullConstants && valueBack instanceof ExprNodeConstantDesc) {
            // ignore, it will be generated by SEL op
            index[i] = Integer.MAX_VALUE;
            constantCols.add(valueBack);
            continue;
        }
        int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack);
        if (kindex >= 0) {
            index[i] = kindex;
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setTabAlias(nm[0]);
            rsRR.put(nm[0], nm[1], newColInfo);
            if (nm2 != null) {
                rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
            }
            continue;
        }
        int vindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, valueColsBack);
        if (vindex >= 0) {
            index[i] = -vindex - 1;
            continue;
        }
        index[i] = -valueCols.size() - 1;
        String outputColName = getColumnInternalName(valueCols.size());
        valueCols.add(value);
        valueColsBack.add(valueBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setTabAlias(nm[0]);
        rsRR.put(nm[0], nm[1], newColInfo);
        if (nm2 != null) {
            rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
        }
        outputColumns.add(outputColName);
    }
    dummy.setParentOperators(null);
    ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(newSortCols, valueCols, outputColumns, false, -1, partitionCols, newSortOrder.toString(), newNullOrder.toString(), numReducers, acidOp);
    Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR);
    List<String> keyColNames = rsdesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), sortCols.get(i));
    }
    List<String> valueColNames = rsdesc.getOutputValueColumnNames();
    for (int i = 0; i < valueColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i));
    }
    interim.setColumnExprMap(colExprMap);
    RowResolver selectRR = new RowResolver();
    ArrayList<ExprNodeDesc> selCols = new ArrayList<ExprNodeDesc>();
    ArrayList<String> selOutputCols = new ArrayList<String>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
    Iterator<ExprNodeDesc> constants = constantCols.iterator();
    for (int i = 0; i < index.length; i++) {
        ColumnInfo prev = columnInfos.get(i);
        String[] nm = inputRR.reverseLookup(prev.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName());
        ColumnInfo info = new ColumnInfo(prev);
        ExprNodeDesc desc;
        if (index[i] == Integer.MAX_VALUE) {
            desc = constants.next();
        } else {
            String field;
            if (index[i] >= 0) {
                field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
            } else {
                field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
            }
            desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
        }
        selCols.add(desc);
        String internalName = getColumnInternalName(i);
        info.setInternalName(internalName);
        selectRR.put(nm[0], nm[1], info);
        if (nm2 != null) {
            selectRR.addMappingOnly(nm2[0], nm2[1], info);
        }
        selOutputCols.add(internalName);
        selColExprMap.put(internalName, desc);
    }
    SelectDesc select = new SelectDesc(selCols, selOutputCols);
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(select, new RowSchema(selectRR.getColumnInfos()), interim), selectRR);
    output.setColumnExprMap(selColExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 70 with RowSchema

use of org.apache.hadoop.hive.ql.exec.RowSchema in project hive by apache.

the class SemanticAnalyzer method genUnionPlan.

@SuppressWarnings("nls")
private Operator genUnionPlan(String unionalias, String leftalias, Operator leftOp, String rightalias, Operator rightOp) throws SemanticException {
    // Currently, the unions are not merged - each union has only 2 parents. So,
    // a n-way union will lead to (n-1) union operators.
    // This can be easily merged into 1 union
    RowResolver leftRR = opParseCtx.get(leftOp).getRowResolver();
    RowResolver rightRR = opParseCtx.get(rightOp).getRowResolver();
    LinkedHashMap<String, ColumnInfo> leftmap = leftRR.getFieldMap(leftalias);
    LinkedHashMap<String, ColumnInfo> rightmap = rightRR.getFieldMap(rightalias);
    // make sure the schemas of both sides are the same
    ASTNode tabref = qb.getAliases().isEmpty() ? null : qb.getParseInfo().getSrcForAlias(qb.getAliases().get(0));
    if (leftmap.size() != rightmap.size()) {
        throw new SemanticException("Schema of both sides of union should match.");
    }
    RowResolver unionoutRR = new RowResolver();
    Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator();
    Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator();
    while (lIter.hasNext()) {
        Map.Entry<String, ColumnInfo> lEntry = lIter.next();
        Map.Entry<String, ColumnInfo> rEntry = rIter.next();
        ColumnInfo lInfo = lEntry.getValue();
        ColumnInfo rInfo = rEntry.getValue();
        // use left alias (~mysql, postgresql)
        String field = lEntry.getKey();
        // try widening conversion, otherwise fail union
        TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType());
        if (commonTypeInfo == null) {
            throw new SemanticException(generateErrorMessage(tabref, "Schema of both sides of union should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table"));
        }
        ColumnInfo unionColInfo = new ColumnInfo(lInfo);
        unionColInfo.setType(commonTypeInfo);
        unionoutRR.put(unionalias, field, unionColInfo);
    }
    // For Spark,TEZ we rely on the generated SelectOperator to do the type casting.
    // Consider:
    //    SEL_1 (int)   SEL_2 (int)    SEL_3 (double)
    // If we first merge SEL_1 and SEL_2 into a UNION_1, and then merge UNION_1
    // with SEL_3 to get UNION_2, then no SelectOperator will be inserted. Hence error
    // will happen afterwards. The solution here is to insert one after UNION_1, which
    // cast int to double.
    boolean isMR = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr");
    if (!isMR || !(leftOp instanceof UnionOperator)) {
        leftOp = genInputSelectForUnion(leftOp, leftmap, leftalias, unionoutRR, unionalias);
    }
    if (!isMR || !(rightOp instanceof UnionOperator)) {
        rightOp = genInputSelectForUnion(rightOp, rightmap, rightalias, unionoutRR, unionalias);
    }
    // else create a new one
    if (leftOp instanceof UnionOperator || (leftOp instanceof SelectOperator && leftOp.getParentOperators() != null && !leftOp.getParentOperators().isEmpty() && leftOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) leftOp).isIdentitySelect())) {
        if (!(leftOp instanceof UnionOperator)) {
            Operator oldChild = leftOp;
            leftOp = (Operator) leftOp.getParentOperators().get(0);
            leftOp.removeChildAndAdoptItsChildren(oldChild);
        }
        // make left a child of right
        List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
        child.add(leftOp);
        rightOp.setChildOperators(child);
        List<Operator<? extends OperatorDesc>> parent = leftOp.getParentOperators();
        parent.add(rightOp);
        UnionDesc uDesc = ((UnionOperator) leftOp).getConf();
        uDesc.setNumInputs(uDesc.getNumInputs() + 1);
        return putOpInsertMap(leftOp, unionoutRR);
    }
    if (rightOp instanceof UnionOperator || (rightOp instanceof SelectOperator && rightOp.getParentOperators() != null && !rightOp.getParentOperators().isEmpty() && rightOp.getParentOperators().get(0) instanceof UnionOperator && ((SelectOperator) rightOp).isIdentitySelect())) {
        if (!(rightOp instanceof UnionOperator)) {
            Operator oldChild = rightOp;
            rightOp = (Operator) rightOp.getParentOperators().get(0);
            rightOp.removeChildAndAdoptItsChildren(oldChild);
        }
        // make right a child of left
        List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
        child.add(rightOp);
        leftOp.setChildOperators(child);
        List<Operator<? extends OperatorDesc>> parent = rightOp.getParentOperators();
        parent.add(leftOp);
        UnionDesc uDesc = ((UnionOperator) rightOp).getConf();
        uDesc.setNumInputs(uDesc.getNumInputs() + 1);
        return putOpInsertMap(rightOp, unionoutRR);
    }
    // Create a new union operator
    Operator<? extends OperatorDesc> unionforward = OperatorFactory.getAndMakeChild(getOpContext(), new UnionDesc(), new RowSchema(unionoutRR.getColumnInfos()));
    // set union operator as child of each of leftOp and rightOp
    List<Operator<? extends OperatorDesc>> child = new ArrayList<Operator<? extends OperatorDesc>>();
    child.add(unionforward);
    rightOp.setChildOperators(child);
    child = new ArrayList<Operator<? extends OperatorDesc>>();
    child.add(unionforward);
    leftOp.setChildOperators(child);
    List<Operator<? extends OperatorDesc>> parent = new ArrayList<Operator<? extends OperatorDesc>>();
    parent.add(leftOp);
    parent.add(rightOp);
    unionforward.setParentOperators(parent);
    // create operator info list to return
    return putOpInsertMap(unionforward, unionoutRR);
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) UnionDesc(org.apache.hadoop.hive.ql.plan.UnionDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Entry(java.util.Map.Entry) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)76 ArrayList (java.util.ArrayList)59 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)57 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)56 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)42 HashMap (java.util.HashMap)39 Operator (org.apache.hadoop.hive.ql.exec.Operator)36 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)32 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)31 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)31 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)30 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)28 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)28 LinkedHashMap (java.util.LinkedHashMap)26 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)25 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)23 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)23 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)22 List (java.util.List)14 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)14