Search in sources :

Example 1 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class SemanticAnalyzer method genGroupByPlanGroupByOperator1.

/**
   * Generate the GroupByOperator for the Query Block (parseInfo.getXXX(dest)).
   * The new GroupByOperator will be a child of the reduceSinkOperatorInfo.
   *
   * @param parseInfo
   * @param dest
   * @param reduceSinkOperatorInfo
   * @param mode
   *          The mode of the aggregation (MERGEPARTIAL, PARTIAL2)
   * @param genericUDAFEvaluators
   *          The mapping from Aggregation StringTree to the
   *          genericUDAFEvaluator.
   * @param groupingSets
   *          list of grouping sets
   * @param groupingSetsPresent
   *          whether grouping sets are present in this query
   * @param groupingSetsNeedAdditionalMRJob
   *          whether grouping sets are consumed by this group by
   * @return the new GroupByOperator
   */
@SuppressWarnings("nls")
private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, Map<String, GenericUDAFEvaluator> genericUDAFEvaluators, List<Integer> groupingSets, boolean groupingSetsPresent, boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    RowResolver groupByInputRowResolver = opParseCtx.get(reduceSinkOperatorInfo).getRowResolver();
    RowResolver groupByOutputRowResolver = new RowResolver();
    groupByOutputRowResolver.setIsExprResolver(true);
    ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int i = 0; i < grpByExprs.size(); ++i) {
        ASTNode grpbyExpr = grpByExprs.get(i);
        ColumnInfo exprInfo = groupByInputRowResolver.getExpression(grpbyExpr);
        if (exprInfo == null) {
            throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(grpbyExpr));
        }
        groupByKeys.add(new ExprNodeColumnDesc(exprInfo));
        String field = getColumnInternalName(i);
        outputColumnNames.add(field);
        ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false);
        groupByOutputRowResolver.putExpression(grpbyExpr, oColInfo);
        addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver);
        colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
    }
    // This is only needed if a new grouping set key is being created
    int groupingSetsPosition = -1;
    // For grouping sets, add a dummy grouping key
    if (groupingSetsPresent) {
        groupingSetsPosition = groupByKeys.size();
        // This function is called for GroupBy2 to add grouping id as part of the groupby keys
        if (!groupingSetsNeedAdditionalMRJob) {
            addGroupingSetKey(groupByKeys, groupByInputRowResolver, groupByOutputRowResolver, outputColumnNames, colExprMap);
        } else {
            // The grouping set has not yet been processed. Create a new grouping key
            // Consider the query: select a,b, count(1) from T group by a,b with cube;
            // where it is being executed in 2 map-reduce jobs
            // The plan for 1st MR is TableScan -> GroupBy1 -> ReduceSink -> GroupBy2 -> FileSink
            // GroupBy1/ReduceSink worked as if grouping sets were not present
            // This function is called for GroupBy2 to create new rows for grouping sets
            // For each input row (a,b), 4 rows are created for the example above:
            // (a,b), (a,null), (null, b), (null, null)
            createNewGroupingKey(groupByKeys, outputColumnNames, groupByOutputRowResolver, colExprMap);
        }
    }
    HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
    // get the last colName for the reduce KEY
    // it represents the column name corresponding to distinct aggr, if any
    String lastKeyColName = null;
    List<ExprNodeDesc> reduceValues = null;
    if (reduceSinkOperatorInfo.getConf() instanceof ReduceSinkDesc) {
        List<String> inputKeyCols = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getOutputKeyColumnNames();
        if (inputKeyCols.size() > 0) {
            lastKeyColName = inputKeyCols.get(inputKeyCols.size() - 1);
        }
        reduceValues = ((ReduceSinkDesc) reduceSinkOperatorInfo.getConf()).getValueCols();
    }
    int numDistinctUDFs = 0;
    boolean containsDistinctAggr = false;
    for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
        ASTNode value = entry.getValue();
        String aggName = unescapeIdentifier(value.getChild(0).getText());
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        boolean isDistinct = (value.getType() == HiveParser.TOK_FUNCTIONDI);
        containsDistinctAggr = containsDistinctAggr || isDistinct;
        // side, so always look for the parameters: d+e
        if (isDistinct) {
            // 0 is the function name
            for (int i = 1; i < value.getChildCount(); i++) {
                ASTNode paraExpr = (ASTNode) value.getChild(i);
                ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(paraExpr);
                if (paraExprInfo == null) {
                    throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(paraExpr));
                }
                String paraExpression = paraExprInfo.getInternalName();
                assert (paraExpression != null);
                if (isDistinct && lastKeyColName != null) {
                    // if aggr is distinct, the parameter is name is constructed as
                    // KEY.lastKeyColName:<tag>._colx
                    paraExpression = Utilities.ReduceField.KEY.name() + "." + lastKeyColName + ":" + numDistinctUDFs + "." + getColumnInternalName(i - 1);
                }
                ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol());
                ExprNodeDesc reduceValue = isConstantParameterInAggregationParameters(paraExprInfo.getInternalName(), reduceValues);
                if (reduceValue != null) {
                    // this parameter is a constant
                    expr = reduceValue;
                }
                aggParameters.add(expr);
            }
        } else {
            ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
            if (paraExprInfo == null) {
                throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(value));
            }
            String paraExpression = paraExprInfo.getInternalName();
            assert (paraExpression != null);
            aggParameters.add(new ExprNodeColumnDesc(paraExprInfo.getType(), paraExpression, paraExprInfo.getTabAlias(), paraExprInfo.getIsVirtualCol()));
        }
        if (isDistinct) {
            numDistinctUDFs++;
        }
        Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
        GenericUDAFEvaluator genericUDAFEvaluator = null;
        genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
        assert (genericUDAFEvaluator != null);
        GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters);
        aggregations.add(new AggregationDesc(aggName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && isDistinct), amode));
        String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1);
        outputColumnNames.add(field);
        groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, udaf.returnType, "", false));
    }
    float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
    float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
    // Nothing special needs to be done for grouping sets if
    // this is the final group by operator, and multiple rows corresponding to the
    // grouping sets have been generated upstream.
    // However, if an addition MR job has been created to handle grouping sets,
    // additional rows corresponding to grouping sets need to be created here.
    Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, groupByMemoryUsage, memoryThreshold, groupingSets, groupingSetsPresent && groupingSetsNeedAdditionalMRJob, groupingSetsPosition, containsDistinctAggr), new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), groupByOutputRowResolver);
    op.setColumnExprMap(colExprMap);
    return op;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap)

Example 2 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class HiveOpConverter method genReduceSink.

@SuppressWarnings({ "rawtypes", "unchecked" })
private static ReduceSinkOperator genReduceSink(Operator<?> input, String tableAlias, ExprNodeDesc[] keys, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, String nullOrder, int numReducers, Operation acidOperation, HiveConf hiveConf) throws SemanticException {
    // dummy for backtracking
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(input));
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
    // Compute join keys and store in reduceKeys
    for (ExprNodeDesc key : keys) {
        reduceKeys.add(key);
        reduceKeysBack.add(ExprNodeDescUtils.backtrack(key, dummy, input));
    }
    // Walk over the input schema and copy in the output
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ColumnInfo> inputColumns = input.getSchema().getSignature();
    ArrayList<ColumnInfo> outputColumns = new ArrayList<ColumnInfo>();
    List<String> outputColumnNames = new ArrayList<String>();
    int[] index = new int[inputColumns.size()];
    for (int i = 0; i < inputColumns.size(); i++) {
        ColumnInfo colInfo = inputColumns.get(i);
        String outputColName = colInfo.getInternalName();
        ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, input);
        int kindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
        if (kindex >= 0) {
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setAlias(outputColName);
            newColInfo.setTabAlias(tableAlias);
            outputColumns.add(newColInfo);
            index[i] = kindex;
            continue;
        }
        int vindex = exprBack == null ? -1 : ExprNodeDescUtils.indexOf(exprBack, reduceValuesBack);
        if (vindex >= 0) {
            index[i] = -vindex - 1;
            continue;
        }
        index[i] = -reduceValues.size() - 1;
        reduceValues.add(expr);
        reduceValuesBack.add(exprBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setAlias(outputColName);
        newColInfo.setTabAlias(tableAlias);
        outputColumns.add(newColInfo);
        outputColumnNames.add(outputColName);
    }
    dummy.setParentOperators(null);
    // Use only 1 reducer if no reduce keys
    if (reduceKeys.size() == 0) {
        numReducers = 1;
        // Cartesian product is not supported in strict mode
        String error = StrictChecks.checkCartesian(hiveConf);
        if (error != null)
            throw new SemanticException(error);
    }
    ReduceSinkDesc rsDesc;
    if (order.isEmpty()) {
        rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, reduceKeys.size(), numReducers, acidOperation);
    } else {
        rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, false, tag, partitionCols, order, nullOrder, numReducers, acidOperation);
    }
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputColumns), input);
    List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
    }
    List<String> valColNames = rsDesc.getOutputValueColumnNames();
    for (int i = 0; i < valColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
    }
    rsOp.setValueIndex(index);
    rsOp.setColumnExprMap(colExprMap);
    rsOp.setInputAliases(input.getSchema().getTableNames().toArray(new String[input.getSchema().getTableNames().size()]));
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + rsOp + " with row schema: [" + rsOp.getSchema() + "]");
    }
    return rsOp;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 3 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class SemanticAnalyzer method genJoinReduceSinkChild.

@SuppressWarnings("nls")
private Operator genJoinReduceSinkChild(QB qb, ExprNodeDesc[] joinKeys, Operator<?> child, String[] srcs, int tag) throws SemanticException {
    // dummy for backtracking
    Operator dummy = Operator.createDummy();
    dummy.setParentOperators(Arrays.asList(child));
    RowResolver inputRR = opParseCtx.get(child).getRowResolver();
    RowResolver outputRR = new RowResolver();
    ArrayList<String> outputColumns = new ArrayList<String>();
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceKeysBack = new ArrayList<ExprNodeDesc>();
    // Compute join keys and store in reduceKeys
    for (ExprNodeDesc joinKey : joinKeys) {
        reduceKeys.add(joinKey);
        reduceKeysBack.add(ExprNodeDescUtils.backtrack(joinKey, dummy, child));
    }
    // Walk over the input row resolver and copy in the output
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    ArrayList<ExprNodeDesc> reduceValuesBack = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<ColumnInfo> columns = inputRR.getColumnInfos();
    int[] index = new int[columns.size()];
    for (int i = 0; i < columns.size(); i++) {
        ColumnInfo colInfo = columns.get(i);
        String[] nm = inputRR.reverseLookup(colInfo.getInternalName());
        String[] nm2 = inputRR.getAlternateMappings(colInfo.getInternalName());
        ExprNodeDesc expr = new ExprNodeColumnDesc(colInfo);
        // backtrack can be null when input is script operator
        ExprNodeDesc exprBack = ExprNodeDescUtils.backtrack(expr, dummy, child);
        int kindex;
        if (exprBack == null) {
            kindex = -1;
        } else if (ExprNodeDescUtils.isConstant(exprBack)) {
            kindex = reduceKeysBack.indexOf(exprBack);
        } else {
            kindex = ExprNodeDescUtils.indexOf(exprBack, reduceKeysBack);
        }
        if (kindex >= 0) {
            ColumnInfo newColInfo = new ColumnInfo(colInfo);
            newColInfo.setInternalName(Utilities.ReduceField.KEY + ".reducesinkkey" + kindex);
            newColInfo.setTabAlias(nm[0]);
            outputRR.put(nm[0], nm[1], newColInfo);
            if (nm2 != null) {
                outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
            }
            index[i] = kindex;
            continue;
        }
        index[i] = -reduceValues.size() - 1;
        String outputColName = getColumnInternalName(reduceValues.size());
        reduceValues.add(expr);
        reduceValuesBack.add(exprBack);
        ColumnInfo newColInfo = new ColumnInfo(colInfo);
        newColInfo.setInternalName(Utilities.ReduceField.VALUE + "." + outputColName);
        newColInfo.setTabAlias(nm[0]);
        outputRR.put(nm[0], nm[1], newColInfo);
        if (nm2 != null) {
            outputRR.addMappingOnly(nm2[0], nm2[1], newColInfo);
        }
        outputColumns.add(outputColName);
    }
    dummy.setParentOperators(null);
    int numReds = -1;
    // Use only 1 reducer in case of cartesian product
    if (reduceKeys.size() == 0) {
        numReds = 1;
        String error = StrictChecks.checkCartesian(conf);
        if (error != null) {
            throw new SemanticException(error);
        }
    }
    ReduceSinkDesc rsDesc = PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumns, false, tag, reduceKeys.size(), numReds, AcidUtils.Operation.NOT_ACID);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(rsDesc, new RowSchema(outputRR.getColumnInfos()), child), outputRR);
    List<String> keyColNames = rsDesc.getOutputKeyColumnNames();
    for (int i = 0; i < keyColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.KEY + "." + keyColNames.get(i), reduceKeys.get(i));
    }
    List<String> valColNames = rsDesc.getOutputValueColumnNames();
    for (int i = 0; i < valColNames.size(); i++) {
        colExprMap.put(Utilities.ReduceField.VALUE + "." + valColNames.get(i), reduceValues.get(i));
    }
    rsOp.setValueIndex(index);
    rsOp.setColumnExprMap(colExprMap);
    rsOp.setInputAliases(srcs);
    return rsOp;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Example 4 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class ReduceSinkDeDuplicationUtils method extractMergeDirections.

/**
 * Returns merge directions between two RSs for criterias (ordering, number of reducers,
 * reducer keys, partition keys). Returns null if any of categories is not mergeable.
 *
 * Values for each index can be -1, 0, 1
 * 1. 0 means two configuration in the category is the same
 * 2. for -1, configuration of parent RS is more specific than child RS
 * 3. for 1, configuration of child RS is more specific than parent RS
 */
private static int[] extractMergeDirections(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minReducer) throws SemanticException {
    ReduceSinkDesc cConf = cRS.getConf();
    ReduceSinkDesc pConf = pRS.getConf();
    // If there is a PTF between cRS and pRS we cannot ignore the order direction
    final boolean checkStrictEquality = isStrictEqualityNeeded(cRS, pRS);
    Integer moveRSOrderTo = checkOrder(checkStrictEquality, cConf.getOrder(), pConf.getOrder(), cConf.getNullOrder(), pConf.getNullOrder());
    if (moveRSOrderTo == null) {
        return null;
    }
    // if cRS is being used for distinct - the two reduce sinks are incompatible
    if (cConf.getDistinctColumnIndices().size() >= 2) {
        return null;
    }
    Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers());
    if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) {
        return null;
    }
    List<ExprNodeDesc> ckeys = cConf.getKeyCols();
    List<ExprNodeDesc> pkeys = pConf.getKeyCols();
    Integer moveKeyColTo = checkExprs(ckeys, pkeys, cRS, pRS);
    if (moveKeyColTo == null) {
        return null;
    }
    List<ExprNodeDesc> cpars = cConf.getPartitionCols();
    List<ExprNodeDesc> ppars = pConf.getPartitionCols();
    Integer movePartitionColTo = checkExprs(cpars, ppars, cRS, pRS);
    if (movePartitionColTo == null) {
        return null;
    }
    Integer moveNumDistKeyTo = checkNumDistributionKey(cConf.getNumDistributionKeys(), pConf.getNumDistributionKeys());
    return new int[] { moveKeyColTo, movePartitionColTo, moveRSOrderTo, moveReducerNumTo, moveNumDistKeyTo };
}
Also used : ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 5 with ReduceSinkDesc

use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.

the class ExplainTask method outputPlan.

public JSONObject outputPlan(Object work, PrintStream out, boolean extended, boolean jsonOutput, int indent, String appendToHeader, boolean inTest) throws Exception {
    // Check if work has an explain annotation
    Annotation note = AnnotationUtils.getAnnotation(work.getClass(), Explain.class);
    String keyJSONObject = null;
    if (note instanceof Explain) {
        Explain xpl_note = (Explain) note;
        boolean invokeFlag = false;
        if (this.work != null && this.work.isUserLevelExplain()) {
            invokeFlag = Level.USER.in(xpl_note.explainLevels());
        } else {
            if (extended) {
                invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels());
            } else {
                invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()) || (this.work != null && this.work.isDebug() && Level.DEBUG.in(xpl_note.explainLevels()));
            }
        }
        if (invokeFlag) {
            Vectorization vectorization = xpl_note.vectorization();
            if (this.work != null && this.work.isVectorization()) {
                invokeFlag = isInvokeVectorization(vectorization);
            } else {
                invokeFlag = isInvokeNonVectorization(vectorization);
            }
        }
        if (invokeFlag) {
            keyJSONObject = xpl_note.displayName();
            if (out != null) {
                out.print(indentString(indent));
                if (appendToHeader != null && !appendToHeader.isEmpty()) {
                    out.println(xpl_note.displayName() + appendToHeader);
                } else {
                    out.println(xpl_note.displayName());
                }
            }
        }
    }
    JSONObject json = jsonOutput ? new JSONObject(new LinkedHashMap<>()) : null;
    // conf and then the children
    if (work instanceof Operator) {
        Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>) work;
        if (operator.getConf() != null) {
            String appender = isLogical ? " (" + operator.getOperatorId() + ")" : "";
            JSONObject jsonOut = outputPlan(operator.getConf(), out, extended, jsonOutput, jsonOutput ? 0 : indent, appender, inTest);
            if (this.work != null && (this.work.isUserLevelExplain() || this.work.isFormatted())) {
                if (jsonOut != null && jsonOut.length() > 0) {
                    ((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put("OperatorId:", operator.getOperatorId());
                }
                if (!this.work.isUserLevelExplain() && this.work.isFormatted() && operator.getConf() instanceof ReduceSinkDesc) {
                    ((JSONObject) jsonOut.get(JSONObject.getNames(jsonOut)[0])).put("outputname:", ((ReduceSinkDesc) operator.getConf()).getOutputName());
                }
            }
            if (jsonOutput) {
                json = jsonOut;
            }
        }
        if (!visitedOps.contains(operator) || !isLogical) {
            visitedOps.add(operator);
            if (operator.getChildOperators() != null) {
                int cindent = jsonOutput ? 0 : indent + 2;
                for (Operator<? extends OperatorDesc> op : operator.getChildOperators()) {
                    JSONObject jsonOut = outputPlan(op, out, extended, jsonOutput, cindent, "", inTest);
                    if (jsonOutput) {
                        ((JSONObject) json.get(JSONObject.getNames(json)[0])).accumulate("children", jsonOut);
                    }
                }
            }
        }
        if (jsonOutput) {
            return json;
        }
        return null;
    }
    // We look at all methods that generate values for explain
    Method[] methods = work.getClass().getMethods();
    Arrays.sort(methods, new MethodComparator());
    for (Method m : methods) {
        int prop_indents = jsonOutput ? 0 : indent + 2;
        note = AnnotationUtils.getAnnotation(m, Explain.class);
        if (note instanceof Explain) {
            Explain xpl_note = (Explain) note;
            boolean invokeFlag = false;
            if (this.work != null && this.work.isUserLevelExplain()) {
                invokeFlag = Level.USER.in(xpl_note.explainLevels());
            } else {
                if (extended) {
                    invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels());
                } else {
                    invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()) || (this.work != null && this.work.isDebug() && Level.DEBUG.in(xpl_note.explainLevels()));
                }
            }
            if (invokeFlag) {
                Vectorization vectorization = xpl_note.vectorization();
                if (invokeFlag) {
                    if (this.work != null && this.work.isVectorization()) {
                        invokeFlag = isInvokeVectorization(vectorization);
                    } else {
                        invokeFlag = isInvokeNonVectorization(vectorization);
                    }
                }
            }
            if (invokeFlag) {
                Object val = null;
                try {
                    val = m.invoke(work);
                } catch (InvocationTargetException ex) {
                    // Ignore the exception, this may be caused by external jars
                    val = null;
                }
                if (val == null) {
                    continue;
                }
                if (xpl_note.jsonOnly() && !jsonOutput) {
                    continue;
                }
                String header = null;
                boolean skipHeader = xpl_note.skipHeader();
                boolean emptyHeader = false;
                if (!xpl_note.displayName().equals("")) {
                    header = indentString(prop_indents) + xpl_note.displayName() + ":";
                } else {
                    emptyHeader = true;
                    prop_indents = indent;
                    header = indentString(prop_indents);
                }
                // Try the output as a primitive object
                if (isPrintable(val)) {
                    if (out != null && shouldPrint(xpl_note, val)) {
                        if (!skipHeader) {
                            out.print(header);
                            out.print(" ");
                        }
                        out.println(val);
                    }
                    if (jsonOutput && shouldPrint(xpl_note, val)) {
                        json.put(header, val.toString());
                    }
                    continue;
                }
                int ind = 0;
                if (!jsonOutput) {
                    if (!skipHeader) {
                        ind = prop_indents + 2;
                    } else {
                        ind = indent;
                    }
                }
                // Try this as a map
                if (val instanceof Map) {
                    // Go through the map and print out the stuff
                    Map<?, ?> mp = (Map<?, ?>) val;
                    if (out != null && !skipHeader && mp != null && !mp.isEmpty()) {
                        out.print(header);
                    }
                    JSONObject jsonOut = outputMap(mp, !skipHeader && !emptyHeader, out, extended, jsonOutput, ind);
                    if (jsonOutput && !mp.isEmpty()) {
                        json.put(header, jsonOut);
                    }
                    continue;
                }
                // Try this as a list
                if (val instanceof List || val instanceof Set) {
                    List l = val instanceof List ? (List) val : new ArrayList((Set) val);
                    if (out != null && !skipHeader && l != null && !l.isEmpty()) {
                        out.print(header);
                    }
                    JSONArray jsonOut = outputList(l, out, !skipHeader && !emptyHeader, extended, jsonOutput, ind, inTest);
                    if (jsonOutput && !l.isEmpty()) {
                        json.put(header, jsonOut);
                    }
                    continue;
                }
                // Finally check if it is serializable
                try {
                    if (!skipHeader && out != null) {
                        out.println(header);
                    }
                    JSONObject jsonOut = outputPlan(val, out, extended, jsonOutput, ind, "", inTest);
                    if (jsonOutput && jsonOut != null && jsonOut.length() != 0) {
                        if (!skipHeader) {
                            json.put(header, jsonOut);
                        } else {
                            for (String k : JSONObject.getNames(jsonOut)) {
                                json.put(k, jsonOut.get(k));
                            }
                        }
                    }
                    continue;
                } catch (ClassCastException ce) {
                // Ignore
                }
            }
        }
    }
    if (jsonOutput) {
        if (keyJSONObject != null) {
            JSONObject ret = new JSONObject(new LinkedHashMap<>());
            ret.put(keyJSONObject, json);
            return ret;
        }
        return json;
    }
    return null;
}
Also used : Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) ArrayList(java.util.ArrayList) Vectorization(org.apache.hadoop.hive.ql.plan.Explain.Vectorization) LinkedHashMap(java.util.LinkedHashMap) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) Explain(org.apache.hadoop.hive.ql.plan.Explain) JSONArray(org.json.JSONArray) Method(java.lang.reflect.Method) Annotation(java.lang.annotation.Annotation) InvocationTargetException(java.lang.reflect.InvocationTargetException) JSONObject(org.json.JSONObject) JSONObject(org.json.JSONObject) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap)

Aggregations

ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)50 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)31 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)31 ArrayList (java.util.ArrayList)29 Operator (org.apache.hadoop.hive.ql.exec.Operator)21 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)20 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)19 HashMap (java.util.HashMap)18 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)17 LinkedHashMap (java.util.LinkedHashMap)16 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)16 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)16 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)14 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)14 SelectDesc (org.apache.hadoop.hive.ql.plan.SelectDesc)13 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)12 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)11 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)11 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)11 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)11