Search in sources :

Example 16 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genMapSideGBRS.

private static OpAttr genMapSideGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<String> outputKeyColumnNames = new ArrayList<String>();
    List<String> outputValueColumnNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    GroupByOperator mapGB = (GroupByOperator) inputOpAf.inputs.get(0);
    ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(mapGB, 0, gbInfo.gbKeys.size() - 1, outputKeyColumnNames, false, colInfoLst, colExprMap, false, false);
    int keyLength = reduceKeys.size();
    if (inclGrpSetInMapSide(gbInfo)) {
        addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap);
        keyLength++;
    }
    if (mapGB.getConf().getKeys().size() > reduceKeys.size()) {
        // NOTE: All dist cols have single output col name;
        reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys().size() - 1, outputKeyColumnNames, true, colInfoLst, colExprMap, false, false));
    } else if (!gbInfo.distColIndices.isEmpty()) {
        // This is the case where distinct cols are part of GB Keys in which case
        // we still need to add it to out put col names
        outputKeyColumnNames.add(SemanticAnalyzer.getColumnInternalName(reduceKeys.size()));
    }
    ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(mapGB, mapGB.getConf().getKeys().size(), outputValueColumnNames, colInfoLst, colExprMap, false, false);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID, gbInfo.defaultNullOrder), new RowSchema(colInfoLst), mapGB);
    rsOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 17 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genReduceSideGB1NoMapGB.

/**
 * RS-GB0
 *
 * @param inputOpAf
 * @param gbInfo
 * @param gbMode
 * @return
 * @throws SemanticException
 */
private static OpAttr genReduceSideGB1NoMapGB(OpAttr inputOpAf, GBInfo gbInfo, GroupByDesc.Mode gbMode) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    String colOutputName = null;
    ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
    ColumnInfo ci;
    boolean useOriginalGBNames = (gbInfo.gbPhysicalPipelineMode == HIVEGBPHYSICALMODE.NO_MAP_SIDE_GB_NO_SKEW);
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, true, false);
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        ci = rsColInfoLst.get(i);
        if (useOriginalGBNames) {
            colOutputName = gbInfo.outputColNames.get(i);
        } else {
            colOutputName = SemanticAnalyzer.getColumnInternalName(i);
        }
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), null, false));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 2. Walk through UDAF and add them to GB
    String lastReduceKeyColName = null;
    if (!rs.getConf().getOutputKeyColumnNames().isEmpty()) {
        lastReduceKeyColName = rs.getConf().getOutputKeyColumnNames().get(rs.getConf().getOutputKeyColumnNames().size() - 1);
    }
    int numDistinctUDFs = 0;
    List<ExprNodeDesc> reduceValues = rs.getConf().getValueCols();
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    int udafColStartPosInOriginalGB = gbInfo.gbKeys.size();
    // the positions in rsColInfoLst are as follows
    // --grpkey--,--distkey--,--values--
    // but distUDAF may be before/after some non-distUDAF,
    // i.e., their positions can be mixed.
    // so for all UDAF we first check to see if it is groupby key, if not is it distinct key
    // if not it should be value
    Map<Integer, List<ExprNodeDesc>> indexToParameter = new TreeMap<>();
    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
        UDAFAttrs udafAttr = gbInfo.udafAttrs.get(i);
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        ColumnInfo rsUDAFParamColInfo;
        ExprNodeDesc udafParam;
        ExprNodeDesc constantPropDistinctUDAFParam;
        for (int j = 0; j < udafAttr.udafParams.size(); j++) {
            int argPos = getColInfoPos(udafAttr.udafParams.get(j), gbInfo);
            rsUDAFParamColInfo = rsColInfoLst.get(argPos);
            String rsUDAFParamName = rsUDAFParamColInfo.getInternalName();
            if (udafAttr.isDistinctUDAF && lastReduceKeyColName != null) {
                rsUDAFParamName = Utilities.ReduceField.KEY.name() + "." + lastReduceKeyColName + ":" + numDistinctUDFs + "." + SemanticAnalyzer.getColumnInternalName(j);
            }
            udafParam = new ExprNodeColumnDesc(rsUDAFParamColInfo.getType(), rsUDAFParamName, rsUDAFParamColInfo.getTabAlias(), rsUDAFParamColInfo.getIsVirtualCol());
            constantPropDistinctUDAFParam = SemanticAnalyzer.isConstantParameterInAggregationParameters(rsUDAFParamColInfo.getInternalName(), reduceValues);
            if (constantPropDistinctUDAFParam != null) {
                udafParam = constantPropDistinctUDAFParam;
            }
            aggParameters.add(udafParam);
        }
        indexToParameter.put(i, aggParameters);
        if (udafAttr.isDistinctUDAF) {
            numDistinctUDFs++;
        }
    }
    for (Map.Entry<Integer, List<ExprNodeDesc>> e : indexToParameter.entrySet()) {
        UDAFAttrs udafAttr = gbInfo.udafAttrs.get(e.getKey());
        Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(gbMode, udafAttr.isDistinctUDAF);
        GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, e.getValue());
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, udafAttr.isDistinctUDAF, udafMode));
        if (useOriginalGBNames) {
            colOutputName = gbInfo.outputColNames.get(udafColStartPosInOriginalGB + e.getKey());
        } else {
            colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1);
        }
        colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
        outputColNames.add(colOutputName);
    }
    Operator rsGB1 = OperatorFactory.getAndMakeChild(new GroupByDesc(gbMode, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.minReductionHashAggrLowerBound, gbInfo.memoryThreshold, gbInfo.minReductionHashAggr, null, false, -1, numDistinctUDFs > 0), new RowSchema(colInfoLst), rs);
    rsGB1.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsGB1);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) TreeMap(java.util.TreeMap) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 18 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveSortExchangeVisitor method visit.

@Override
OpAttr visit(HiveSortExchange exchangeRel) throws SemanticException {
    OpAttr inputOpAf = hiveOpConverter.dispatch(exchangeRel.getInput());
    String tabAlias = inputOpAf.tabAlias;
    if (tabAlias == null || tabAlias.length() == 0) {
        tabAlias = hiveOpConverter.getHiveDerivedTableAlias();
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + exchangeRel.getId() + ":" + exchangeRel.getRelTypeName() + " with row type: [" + exchangeRel.getRowType() + "]");
    }
    RelDistribution distribution = exchangeRel.getDistribution();
    if (distribution.getType() != Type.HASH_DISTRIBUTED) {
        throw new SemanticException("Only hash distribution supported for LogicalExchange");
    }
    ExprNodeDesc[] expressions = new ExprNodeDesc[exchangeRel.getKeys().size()];
    for (int index = 0; index < exchangeRel.getKeys().size(); index++) {
        expressions[index] = HiveOpConverterUtils.convertToExprNode(exchangeRel.getKeys().get(index), exchangeRel.getInput(), inputOpAf.tabAlias, inputOpAf.vcolsInCalcite);
    }
    exchangeRel.setKeyExpressions(expressions);
    ReduceSinkOperator rsOp = genReduceSink(inputOpAf.inputs.get(0), tabAlias, expressions, -1, -1, Operation.NOT_ACID, hiveOpConverter.getHiveConf());
    return new OpAttr(tabAlias, inputOpAf.vcolsInCalcite, rsOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RelDistribution(org.apache.calcite.rel.RelDistribution) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 19 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveTableFunctionScanVisitor method visit.

@Override
OpAttr visit(HiveTableFunctionScan scanRel) throws SemanticException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + scanRel.getId() + ":" + scanRel.getRelTypeName() + " with row type: [" + scanRel.getRowType() + "]");
    }
    RexCall call = (RexCall) scanRel.getCall();
    RowResolver rowResolver = new RowResolver();
    List<String> fieldNames = new ArrayList<>(scanRel.getRowType().getFieldNames());
    List<String> functionFieldNames = new ArrayList<>();
    List<ExprNodeDesc> exprCols = new ArrayList<>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
    for (int pos = 0; pos < call.getOperands().size(); pos++) {
        ExprNodeConverter converter = new ExprNodeConverter(SemanticAnalyzer.DUMMY_TABLE, fieldNames.get(pos), scanRel.getRowType(), scanRel.getRowType(), ((HiveTableScan) scanRel.getInput(0)).getPartOrVirtualCols(), scanRel.getCluster().getTypeFactory(), true);
        ExprNodeDesc exprCol = call.getOperands().get(pos).accept(converter);
        colExprMap.put(HiveConf.getColumnInternalName(pos), exprCol);
        exprCols.add(exprCol);
        ColumnInfo columnInfo = new ColumnInfo(HiveConf.getColumnInternalName(pos), exprCol.getWritableObjectInspector(), SemanticAnalyzer.DUMMY_TABLE, false);
        rowResolver.put(columnInfo.getTabAlias(), columnInfo.getAlias(), columnInfo);
        functionFieldNames.add(HiveConf.getColumnInternalName(pos));
    }
    OpAttr inputOpAf = hiveOpConverter.dispatch(scanRel.getInputs().get(0));
    TableScanOperator op = (TableScanOperator) inputOpAf.inputs.get(0);
    op.getConf().setRowLimit(1);
    Operator<?> output = OperatorFactory.getAndMakeChild(new SelectDesc(exprCols, functionFieldNames, false), new RowSchema(rowResolver.getRowSchema()), op);
    output.setColumnExprMap(colExprMap);
    Operator<?> funcOp = genUDTFPlan(call, functionFieldNames, output, rowResolver);
    return new OpAttr(null, new HashSet<Integer>(), funcOp);
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) RowResolver(org.apache.hadoop.hive.ql.parse.RowResolver) ExprNodeConverter(org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter) RexCall(org.apache.calcite.rex.RexCall) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 20 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveUnionVisitor method visit.

@Override
OpAttr visit(HiveUnion unionRel) throws SemanticException {
    // 1. Convert inputs
    List<RelNode> inputsList = extractRelNodeFromUnion(unionRel);
    OpAttr[] inputs = new OpAttr[inputsList.size()];
    for (int i = 0; i < inputs.length; i++) {
        inputs[i] = hiveOpConverter.dispatch(inputsList.get(i));
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + unionRel.getId() + ":" + unionRel.getRelTypeName() + " with row type: [" + unionRel.getRowType() + "]");
    }
    // 2. Create a new union operator
    UnionDesc unionDesc = new UnionDesc();
    unionDesc.setNumInputs(inputs.length);
    String tableAlias = hiveOpConverter.getHiveDerivedTableAlias();
    ArrayList<ColumnInfo> cinfoLst = createColInfos(inputs[0].inputs.get(0), tableAlias);
    Operator<?>[] children = new Operator<?>[inputs.length];
    for (int i = 0; i < children.length; i++) {
        if (i == 0) {
            children[i] = inputs[i].inputs.get(0);
        } else {
            Operator<?> op = inputs[i].inputs.get(0);
            // We need to check if the other input branches for union is following the first branch
            // We may need to cast the data types for specific columns.
            children[i] = genInputSelectForUnion(op, cinfoLst);
        }
    }
    Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild(hiveOpConverter.getSemanticAnalyzer().getOpContext(), unionDesc, new RowSchema(cinfoLst), children);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + unionOp + " with row schema: [" + unionOp.getSchema() + "]");
    }
    // 3. Return result
    return new OpAttr(tableAlias, inputs[0].vcolsInCalcite, unionOp);
}
Also used : UnionDesc(org.apache.hadoop.hive.ql.plan.UnionDesc) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) RelNode(org.apache.calcite.rel.RelNode)

Aggregations

OpAttr (org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr)21 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)13 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)13 ArrayList (java.util.ArrayList)12 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)12 HashMap (java.util.HashMap)9 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)9 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)6 Operator (org.apache.hadoop.hive.ql.exec.Operator)6 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)5 HashSet (java.util.HashSet)4 GenericUDAFInfo (org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo)4 AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)4 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)4 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)4 Mode (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)4 ExprNodeConverter (org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 RexNode (org.apache.calcite.rex.RexNode)2