Search in sources :

Example 16 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class ReduceSinkDeDuplicationUtils method extractMergeDirections.

/**
 * Returns merge directions between two RSs for criterias (ordering, number of reducers,
 * reducer keys, partition keys). Returns null if any of categories is not mergeable.
 *
 * Values for each index can be -1, 0, 1
 * 1. 0 means two configuration in the category is the same
 * 2. for -1, configuration of parent RS is more specific than child RS
 * 3. for 1, configuration of child RS is more specific than parent RS
 */
private static int[] extractMergeDirections(HiveConf hiveConf, ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer) throws SemanticException {
    ReduceSinkDesc cConf = cRS.getConf();
    ReduceSinkDesc pConf = pRS.getConf();
    // If there is a PTF between cRS and pRS we cannot ignore the order direction
    final boolean checkStrictEquality = isStrictEqualityNeeded(cRS, pRS);
    Integer moveRSOrderTo = checkOrder(checkStrictEquality, cConf.getOrder(), pConf.getOrder(), cConf.getNullOrder(), pConf.getNullOrder());
    if (moveRSOrderTo == null) {
        return null;
    }
    // if cRS is being used for distinct - the two reduce sinks are incompatible
    if (cConf.getDistinctColumnIndices().size() >= 2) {
        return null;
    }
    if (cConf.getBucketingVersion() != pConf.getBucketingVersion()) {
        return null;
    }
    Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers());
    if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) {
        return null;
    }
    List<ExprNodeDesc> ckeys = cConf.getKeyCols();
    List<ExprNodeDesc> pkeys = pConf.getKeyCols();
    Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS);
    if (moveKeyColTo == null) {
        return null;
    }
    List<ExprNodeDesc> cpars = cConf.getPartitionCols();
    List<ExprNodeDesc> ppars = pConf.getPartitionCols();
    Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS);
    if (movePartitionColTo == null) {
        return null;
    }
    if (canReplaceParentWithChildPartioning(movePartitionColTo, cpars, ppars)) {
        long oldParallelism = estimateReducers(hiveConf, pRS);
        long newParallelism = estimateReducers(hiveConf, cRS);
        if (newParallelism < oldParallelism && newParallelism < minReducer) {
            return null;
        }
    }
    Integer moveNumDistKeyTo = checkNumDistributionKey(cConf.getNumDistributionKeys(), pConf.getNumDistributionKeys());
    return new int[] { moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo, moveNumDistKeyTo };
}
Also used : ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 17 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class ReduceSinkDeDuplicationUtils method strictMerge.

public static boolean strictMerge(ReduceSinkOperator cRS, List<ReduceSinkOperator> pRSs) throws SemanticException {
    ReduceSinkDesc cRSc = cRS.getConf();
    for (ReduceSinkOperator pRS : pRSs) {
        ReduceSinkDesc pRSc = pRS.getConf();
        if (cRSc.getKeyCols().size() != pRSc.getKeyCols().size()) {
            return false;
        }
        if (cRSc.getPartitionCols().size() != pRSc.getPartitionCols().size()) {
            return false;
        }
        Integer moveRSOrderTo = checkOrder(true, cRSc.getOrder(), pRSc.getOrder(), cRSc.getNullOrder(), pRSc.getNullOrder());
        if (moveRSOrderTo == null) {
            return false;
        }
        int cKeySize = cRSc.getKeyCols().size();
        for (int i = 0; i < cKeySize; i++) {
            ExprNodeDesc cExpr = cRSc.getKeyCols().get(i);
            ExprNodeDesc pExpr = pRSc.getKeyCols().get(i);
            if (cExpr instanceof ExprNodeConstantDesc || pExpr instanceof ExprNodeConstantDesc) {
                // If ckeys or pkeys have constant node expressions avoid the merge.
                return false;
            }
            ExprNodeDesc backtrackCExpr = ExprNodeDescUtils.backtrack(cExpr, cRS, pRS);
            if (backtrackCExpr == null || !pExpr.isSame(backtrackCExpr)) {
                return false;
            }
        }
        int cPartSize = cRSc.getPartitionCols().size();
        for (int i = 0; i < cPartSize; i++) {
            ExprNodeDesc cExpr = cRSc.getPartitionCols().get(i);
            ExprNodeDesc pExpr = pRSc.getPartitionCols().get(i);
            if (cExpr instanceof ExprNodeConstantDesc || pExpr instanceof ExprNodeConstantDesc) {
                // If cpartcols or ppartcols have constant node expressions avoid the merge.
                return false;
            }
            ExprNodeDesc backtrackCExpr = ExprNodeDescUtils.backtrack(cExpr, cRS, pRS);
            if (backtrackCExpr == null || !pExpr.isSame(backtrackCExpr)) {
                return false;
            }
        }
        if (cRSc.getBucketCols() != null || pRSc.getBucketCols() != null) {
            if (cRSc.getBucketCols() == null || pRSc.getBucketCols() == null) {
                return false;
            }
            if (cRSc.getBucketCols().size() != pRSc.getBucketCols().size()) {
                return false;
            }
            int cBucketColsSize = cRSc.getBucketCols().size();
            for (int i = 0; i < cBucketColsSize; i++) {
                ExprNodeDesc cExpr = cRSc.getBucketCols().get(i);
                ExprNodeDesc pExpr = pRSc.getBucketCols().get(i);
                if (cExpr instanceof ExprNodeConstantDesc || pExpr instanceof ExprNodeConstantDesc) {
                    // If cbucketcols or pbucketcols have constant node expressions avoid the merge.
                    return false;
                }
                ExprNodeDesc backtrackCExpr = ExprNodeDescUtils.backtrack(cExpr, cRS, pRS);
                if (backtrackCExpr == null || !pExpr.isSame(backtrackCExpr)) {
                    return false;
                }
            }
        }
        // Meets all requirements
        return true;
    }
    // Default
    return false;
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 18 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class ReduceSinkDeDuplicationUtils method aggressiveDedup.

protected static boolean aggressiveDedup(ReduceSinkOperator cRS, ReduceSinkOperator pRS, ReduceSinkDeduplicateProcCtx dedupCtx) throws SemanticException {
    assert cRS.getNumParent() == 1;
    ReduceSinkDesc cConf = cRS.getConf();
    ReduceSinkDesc pConf = pRS.getConf();
    List<ExprNodeDesc> cKeys = cConf.getKeyCols();
    List<ExprNodeDesc> pKeys = pConf.getKeyCols();
    if (!checkSelectSingleBranchOnly(cRS, pRS)) {
        return false;
    }
    // If child keys are null or empty, we bail out
    if (cKeys == null || cKeys.isEmpty()) {
        return false;
    }
    // If parent keys are null or empty, we bail out
    if (pKeys == null || pKeys.isEmpty()) {
        return false;
    }
    // Backtrack key columns of cRS to pRS
    // If we cannot backtrack any of the columns, bail out
    List<ExprNodeDesc> cKeysInParentRS = ExprNodeDescUtils.backtrack(cKeys, cRS, pRS);
    for (int i = 0; i < cKeysInParentRS.size(); i++) {
        ExprNodeDesc pexpr = cKeysInParentRS.get(i);
        if (pexpr == null) {
            // We cannot backtrack the expression, we bail out
            return false;
        }
    }
    cRS.getConf().setKeyCols(cKeysInParentRS);
    // Backtrack partition columns of cRS to pRS
    // If we cannot backtrack any of the columns, bail out
    List<ExprNodeDesc> cPartitionInParentRS = ExprNodeDescUtils.backtrack(cConf.getPartitionCols(), cRS, pRS);
    for (int i = 0; i < cPartitionInParentRS.size(); i++) {
        ExprNodeDesc pexpr = cPartitionInParentRS.get(i);
        if (pexpr == null) {
            // We cannot backtrack the expression, we bail out
            return false;
        }
    }
    cRS.getConf().setPartitionCols(cPartitionInParentRS);
    // Backtrack value columns of cRS to pRS
    // If we cannot backtrack any of the columns, bail out
    List<ExprNodeDesc> cValueInParentRS = ExprNodeDescUtils.backtrack(cConf.getValueCols(), cRS, pRS);
    for (int i = 0; i < cValueInParentRS.size(); i++) {
        ExprNodeDesc pexpr = cValueInParentRS.get(i);
        if (pexpr == null) {
            // We cannot backtrack the expression, we bail out
            return false;
        }
    }
    cRS.getConf().setValueCols(cValueInParentRS);
    // If we cannot backtrack any of the columns, bail out
    if (cConf.getBucketCols() != null) {
        List<ExprNodeDesc> cBucketInParentRS = ExprNodeDescUtils.backtrack(cConf.getBucketCols(), cRS, pRS);
        for (int i = 0; i < cBucketInParentRS.size(); i++) {
            ExprNodeDesc pexpr = cBucketInParentRS.get(i);
            if (pexpr == null) {
                // We cannot backtrack the expression, we bail out
                return false;
            }
        }
        cRS.getConf().setBucketCols(cBucketInParentRS);
    }
    // Update column expression map
    for (Entry<String, ExprNodeDesc> e : cRS.getColumnExprMap().entrySet()) {
        e.setValue(ExprNodeDescUtils.backtrack(e.getValue(), cRS, pRS));
    }
    // Replace pRS with cRS and remove operator sequence from pRS to cRS
    // Recall that the sequence must be pRS-SEL*-cRS
    Operator<? extends OperatorDesc> parent = cRS.getParentOperators().get(0);
    while (parent != pRS) {
        dedupCtx.addRemovedOperator(parent);
        parent = parent.getParentOperators().get(0);
    }
    dedupCtx.addRemovedOperator(pRS);
    cRS.getParentOperators().clear();
    for (Operator<? extends OperatorDesc> op : pRS.getParentOperators()) {
        op.replaceChild(pRS, cRS);
        cRS.getParentOperators().add(op);
    }
    pRS.getParentOperators().clear();
    pRS.getChildOperators().clear();
    return true;
}
Also used : ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 19 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class SemanticAnalyzer method genReduceSinkPlan.

@SuppressWarnings("nls")
private Operator genReduceSinkPlan(Operator<?> input, List<ExprNodeDesc> partitionCols, List<ExprNodeDesc> sortCols, String sortOrder, String nullOrder, int numReducers, AcidUtils.Operation acidOp, boolean pullConstants, boolean isCompaction) throws SemanticException {
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(input));
    List<ExprNodeDesc> newSortCols = new ArrayList<ExprNodeDesc>();
    StringBuilder newSortOrder = new StringBuilder();
    StringBuilder newNullOrder = new StringBuilder();
    List<ExprNodeDesc> sortColsBack = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < sortCols.size(); i++) {
        ExprNodeDesc sortCol = sortCols.get(i);
        // we are pulling constants but this is not a constant
        if (!pullConstants || !(sortCol instanceof ExprNodeConstantDesc)) {
            newSortCols.add(sortCol);
            newSortOrder.append(sortOrder.charAt(i));
            newNullOrder.append(nullOrder.charAt(i));
            sortColsBack.add(ExprNodeDescUtils.backtrack(sortCol, dummy, input));
        }
    }
    // For the generation of the values expression just get the inputs
    // signature and generate field expressions for those
    RowResolver rsRR = new RowResolver();
    List<String> outputColumns = new ArrayList<String>();
    List<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
    List<ExprNodeDesc> valueColsBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ExprNodeDesc> constantCols = new ArrayList<ExprNodeDesc>();
    List<ColumnInfo> columnInfos = inputRR.getColumnInfos();
    int[] index = new int[columnInfos.size()];
    for (int i = 0; i < index.length; i++) {
        ColumnInfo colInfo = columnInfos.get(i);
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        ExprNodeColumnDesc value = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc valueBack = ExprNodeDescUtils.backtrack(value, dummy, input);
        if (pullConstants && valueBack instanceof ExprNodeConstantDesc) {
            // ignore, it will be generated by SEL op
            index[i] = Integer.MAX_VALUE;
            constantCols.add(valueBack);
            continue;
        }
        int kindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, sortColsBack);
        if (kindex >= 0) {
            index[i] = kindex;
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setTabAlias(nm[0]);
            rsRR.put(nm[0], nm[1], newColInfo);
            if (nm2 != null) {
                rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
            }
            continue;
        }
        int vindex = valueBack == null ? -1 : ExprNodeDescUtils.indexOf(valueBack, valueColsBack);
        if (vindex >= 0) {
            index[i] = -vindex - 1;
            continue;
        }
        index[i] = -valueCols.size() - 1;
        String outputColName = getColumnInternalName(valueCols.size());
        valueCols.add(value);
        valueColsBack.add(valueBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setTabAlias(nm[0]);
        rsRR.put(nm[0], nm[1], newColInfo);
        if (nm2 != null) {
            rsRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
        }
        outputColumns.add(outputColName);
    }
    dummy.setParentOperators(null);
    ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(newSortCols, valueCols, outputColumns, false, -1, partitionCols, newSortOrder.toString(), newNullOrder.toString(), defaultNullOrder, numReducers, acidOp, isCompaction);
    Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc, new RowSchema(rsRR.getColumnInfos()), input), rsRR);
    List<String> keyColNames = rsdesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), newSortCols.get(i));
    }
    List<String> valueColNames = rsdesc.getOutputValueColumnNames();
    for (int i = 0; i < valueColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valueColNames.get(i), valueCols.get(i));
    }
    interim.setColumnExprMap(colExprMap);
    RowResolver selectRR = new RowResolver();
    List<ExprNodeDesc> selCols = new ArrayList<ExprNodeDesc>();
    List<String> selOutputCols = new ArrayList<String>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<String, ExprNodeDesc>();
    Iterator<ExprNodeDesc> constants = constantCols.iterator();
    for (int i = 0; i < index.length; i++) {
        ColumnInfo prev = columnInfos.get(i);
        String[] nm = inputRR.reverseLookup(prev.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName());
        ColumnInfo info = new ColumnInfo(prev);
        ExprNodeDesc desc;
        if (index[i] == Integer.MAX_VALUE) {
            desc = constants.next();
        } else {
            String field;
            if (index[i] >= 0) {
                field = Utilities.ReduceField.KEY + "." + keyColNames.get(index[i]);
            } else {
                field = Utilities.ReduceField.VALUE + "." + valueColNames.get(-index[i] - 1);
            }
            desc = new ExprNodeColumnDesc(info.getType(), field, info.getTabAlias(), info.getIsVirtualCol());
        }
        selCols.add(desc);
        String internalName = getColumnInternalName(i);
        info.setInternalName(internalName);
        selectRR.put(nm[0], nm[1], info);
        if (nm2 != null) {
            selectRR.addMappingOnly(nm2[0], nm2[1], info);
        }
        selOutputCols.add(internalName);
        selColExprMap.put(internalName, desc);
    }
    SelectDesc select = new SelectDesc(selCols, selOutputCols);
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(select, new RowSchema(selectRR.getColumnInfos()), interim), selectRR);
    output.setColumnExprMap(selColExprMap);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 20 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class SemanticAnalyzer method genMaterializedViewDataOrgPlan.

private Operator genMaterializedViewDataOrgPlan(List<ColumnInfo> sortColInfos, List<ColumnInfo> distributeColInfos, RowResolver inputRR, Operator input) {
    // In this case, we will introduce a RS and immediately after a SEL that restores
    // the row schema to what follow-up operations are expecting
    Set<String> keys = sortColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
    Set<String> distributeKeys = distributeColInfos.stream().map(ColumnInfo::getInternalName).collect(Collectors.toSet());
    List<ExprNodeDesc> keyCols = new ArrayList<>();
    List<String> keyColNames = new ArrayList<>();
    StringBuilder order = new StringBuilder();
    StringBuilder nullOrder = new StringBuilder();
    List<ExprNodeDesc> valCols = new ArrayList<>();
    List<String> valColNames = new ArrayList<>();
    List<ExprNodeDesc> partCols = new ArrayList<>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
    Map<String, String> nameMapping = new HashMap<>();
    // map _col0 to KEY._col0, etc
    for (ColumnInfo ci : inputRR.getRowSchema().getSignature()) {
        ExprNodeColumnDesc e = new ExprNodeColumnDesc(ci);
        String columnName = ci.getInternalName();
        if (keys.contains(columnName)) {
            // key (sort column)
            keyColNames.add(columnName);
            keyCols.add(e);
            colExprMap.put(Utilities.ReduceField.KEY + "." + columnName, e);
            nameMapping.put(columnName, Utilities.ReduceField.KEY + "." + columnName);
            order.append("+");
            nullOrder.append("a");
        } else {
            // value
            valColNames.add(columnName);
            valCols.add(e);
            colExprMap.put(Utilities.ReduceField.VALUE + "." + columnName, e);
            nameMapping.put(columnName, Utilities.ReduceField.VALUE + "." + columnName);
        }
        if (distributeKeys.contains(columnName)) {
            // distribute column
            partCols.add(e.clone());
        }
    }
    // Create Key/Value TableDesc. When the operator plan is split into MR tasks,
    // the reduce operator will initialize Extract operator with information
    // from Key and Value TableDesc
    List<FieldSchema> fields = PlanUtils.getFieldSchemasFromColumnList(keyCols, keyColNames, 0, "");
    TableDesc keyTable = PlanUtils.getReduceKeyTableDesc(fields, order.toString(), nullOrder.toString());
    List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(valCols, valColNames, 0, "");
    TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
    List<List<Integer>> distinctColumnIndices = new ArrayList<>();
    // Number of reducers is set to default (-1)
    ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, valueTable, Operation.NOT_ACID);
    RowResolver rsRR = new RowResolver();
    List<ColumnInfo> rsSignature = new ArrayList<>();
    for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
        ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        colInfo.setInternalName(nameMapping.get(colInfo.getInternalName()));
        rsSignature.add(colInfo);
        rsRR.put(nm[0], nm[1], colInfo);
        if (nm2 != null) {
            rsRR.addMappingOnly(nm2[0], nm2[1], colInfo);
        }
    }
    Operator<?> result = putOpInsertMap(OperatorFactory.getAndMakeChild(rsConf, new RowSchema(rsSignature), input), rsRR);
    result.setColumnExprMap(colExprMap);
    // Create SEL operator
    RowResolver selRR = new RowResolver();
    List<ColumnInfo> selSignature = new ArrayList<>();
    List<ExprNodeDesc> columnExprs = new ArrayList<>();
    List<String> colNames = new ArrayList<>();
    Map<String, ExprNodeDesc> selColExprMap = new HashMap<>();
    for (int index = 0; index < input.getSchema().getSignature().size(); index++) {
        ColumnInfo colInfo = new ColumnInfo(input.getSchema().getSignature().get(index));
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        selSignature.add(colInfo);
        selRR.put(nm[0], nm[1], colInfo);
        if (nm2 != null) {
            selRR.addMappingOnly(nm2[0], nm2[1], colInfo);
        }
        String colName = colInfo.getInternalName();
        ExprNodeDesc exprNodeDesc;
        if (keys.contains(colName)) {
            exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.KEY.toString() + "." + colName, null, false);
            columnExprs.add(exprNodeDesc);
        } else {
            exprNodeDesc = new ExprNodeColumnDesc(colInfo.getType(), ReduceField.VALUE.toString() + "." + colName, null, false);
            columnExprs.add(exprNodeDesc);
        }
        colNames.add(colName);
        selColExprMap.put(colName, exprNodeDesc);
    }
    SelectDesc selConf = new SelectDesc(columnExprs, colNames);
    result = putOpInsertMap(OperatorFactory.getAndMakeChild(selConf, new RowSchema(selSignature), result), selRR);
    result.setColumnExprMap(selColExprMap);
    return result;
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PreInsertTableDesc(org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Aggregations

ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)50 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)31 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)31 ArrayList (java.util.ArrayList)29 Operator (org.apache.hadoop.hive.ql.exec.Operator)21 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)20 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)19 HashMap (java.util.HashMap)18 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)17 LinkedHashMap (java.util.LinkedHashMap)16 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)16 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)16 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)14 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)14 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)13 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)12 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)11 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)11 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)11 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)11