Search in sources :

Example 1 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveFilterVisitor method visit.

/**
 * TODO: 1) isSamplingPred 2) sampleDesc 3) isSortedFilter.
 */
@Override
OpAttr visit(HiveFilter filterRel) throws SemanticException {
    OpAttr inputOpAf = hiveOpConverter.dispatch(filterRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + filterRel.getId() + ":" + filterRel.getRelTypeName() + " with row type: [" + filterRel.getRowType() + "]");
    }
    ExprNodeDesc filCondExpr = filterRel.getCondition().accept(new ExprNodeConverter(inputOpAf.tabAlias, filterRel.getInput().getRowType(), inputOpAf.vcolsInCalcite, filterRel.getCluster().getTypeFactory(), true));
    FilterDesc filDesc = new FilterDesc(filCondExpr, false);
    ArrayList<ColumnInfo> cinfoLst = HiveOpConverterUtils.createColInfos(inputOpAf.inputs.get(0));
    FilterOperator filOp = (FilterOperator) OperatorFactory.getAndMakeChild(filDesc, new RowSchema(cinfoLst), inputOpAf.inputs.get(0));
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + filOp + " with row schema: [" + filOp.getSchema() + "]");
    }
    return inputOpAf.clone(filOp);
}
Also used : FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ExprNodeConverter(org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter)

Example 2 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveProjectVisitor method visit.

@Override
OpAttr visit(HiveProject projectRel) throws SemanticException {
    OpAttr inputOpAf = hiveOpConverter.dispatch(projectRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + projectRel.getId() + ":" + projectRel.getRelTypeName() + " with row type: [" + projectRel.getRowType() + "]");
    }
    WindowingSpec windowingSpec = new WindowingSpec();
    List<String> exprNames = new ArrayList<String>(projectRel.getRowType().getFieldNames());
    List<ExprNodeDesc> exprCols = new ArrayList<ExprNodeDesc>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    for (int pos = 0; pos < projectRel.getProjects().size(); pos++) {
        ExprNodeConverter converter = new ExprNodeConverter(inputOpAf.tabAlias, projectRel.getRowType().getFieldNames().get(pos), projectRel.getInput().getRowType(), projectRel.getRowType(), inputOpAf.vcolsInCalcite, projectRel.getCluster().getTypeFactory(), true);
        ExprNodeDesc exprCol = projectRel.getProjects().get(pos).accept(converter);
        colExprMap.put(exprNames.get(pos), exprCol);
        exprCols.add(exprCol);
        // TODO: Cols that come through PTF should it retain (VirtualColumness)?
        if (converter.getWindowFunctionSpec() != null) {
            for (WindowFunctionSpec wfs : converter.getWindowFunctionSpec()) {
                windowingSpec.addWindowFunction(wfs);
            }
        }
    }
    if (windowingSpec.getWindowExpressions() != null && !windowingSpec.getWindowExpressions().isEmpty()) {
        inputOpAf = genPTF(inputOpAf, windowingSpec);
    }
    // TODO: is this a safe assumption (name collision, external names...)
    SelectDesc sd = new SelectDesc(exprCols, exprNames);
    Pair<ArrayList<ColumnInfo>, Set<Integer>> colInfoVColPair = createColInfos(projectRel.getProjects(), exprCols, exprNames, inputOpAf);
    SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(sd, new RowSchema(colInfoVColPair.getKey()), inputOpAf.inputs.get(0));
    selOp.setColumnExprMap(colExprMap);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + selOp + " with row schema: [" + selOp.getSchema() + "]");
    }
    return new OpAttr(inputOpAf.tabAlias, colInfoVColPair.getValue(), selOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) WindowingSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec) WindowFunctionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec) ExprNodeConverter(org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc)

Example 3 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genReduceSideGB2.

private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    ArrayList<String> outputColNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    String colOutputName = null;
    ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
    ColumnInfo ci;
    // 1. Build GB Keys, grouping set starting position
    // 1.1 First Add original GB Keys
    ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        ci = rsColInfoLst.get(i);
        colOutputName = gbInfo.outputColNames.get(i);
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
        colExprMap.put(colOutputName, gbKeys.get(i));
    }
    // 1.2 Add GrpSet Col
    int groupingSetsPosition = -1;
    if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
        groupingSetsPosition = gbKeys.size();
        ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
        gbKeys.add(grpSetColExpr);
        colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
        ;
        outputColNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
        colExprMap.put(colOutputName, grpSetColExpr);
    }
    // 2. Add UDAF
    UDAFAttrs udafAttr;
    ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
    int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
    int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
    for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
        udafAttr = gbInfo.udafAttrs.get(i);
        ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
        aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
        colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
        outputColNames.add(colOutputName);
        Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
        GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
        aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
        colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
    }
    Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, gbInfo.minReductionHashAggr, gbInfo.minReductionHashAggrLowerBound, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
    rsGBOp2.setColumnExprMap(colExprMap);
    // TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
    return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) GenericUDAFInfo(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 4 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genNoMapSideGBSkew.

/**
 * RS-GB1-RS-GB2
 *
 * @param inputOpAf
 * @param aggRel
 * @param gbInfo
 * @return
 * @throws SemanticException
 */
private static OpAttr genNoMapSideGBSkew(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
    OpAttr mapSideRS = null;
    OpAttr reduceSideGB1NoMapGB = null;
    OpAttr reduceSideRS = null;
    OpAttr reduceSideGB2 = null;
    // 1. Insert MapSide RS
    mapSideRS = genMapSideRS(inputOpAf, gbInfo);
    // 2. Insert ReduceSide GB
    reduceSideGB1NoMapGB = genReduceSideGB1NoMapGB(mapSideRS, gbInfo, GroupByDesc.Mode.PARTIAL1);
    // 3. Insert RS on reduce side with Reduce side GB as input
    reduceSideRS = genReduceGBRS(reduceSideGB1NoMapGB, gbInfo);
    // 4. Insert ReduceSide GB2
    reduceSideGB2 = genReduceSideGB2(reduceSideRS, gbInfo);
    return reduceSideGB2;
}
Also used : OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr)

Example 5 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genMapSideGBSkewGBKeysAndDistUDAFNotPresent.

/**
 * GB-RS-GB2
 *
 * @param inputOpAf
 * @param aggRel
 * @param gbInfo
 * @return
 * @throws SemanticException
 */
private static OpAttr genMapSideGBSkewGBKeysAndDistUDAFNotPresent(OpAttr inputOpAf, HiveAggregate aggRel, GBInfo gbInfo) throws SemanticException {
    OpAttr mapSideGB = null;
    OpAttr mapSideRS = null;
    OpAttr reduceSideGB2 = null;
    // 1. Sanity check
    if (gbInfo.grpSetRqrAdditionalMRJob) {
        String errorMsg = "The number of rows per input row due to grouping sets is " + gbInfo.grpSets.size();
        throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg));
    }
    // 1. Insert MapSide GB
    mapSideGB = genMapSideGB(inputOpAf, gbInfo);
    // 2. Insert MapSide RS
    mapSideRS = genMapSideGBRS(mapSideGB, gbInfo);
    // 3. Insert ReduceSide GB2
    reduceSideGB2 = genReduceSideGB2(mapSideRS, gbInfo);
    return reduceSideGB2;
}
Also used : OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

OpAttr (org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr)21 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)13 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)13 ArrayList (java.util.ArrayList)12 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)12 HashMap (java.util.HashMap)9 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)9 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)6 Operator (org.apache.hadoop.hive.ql.exec.Operator)6 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)5 HashSet (java.util.HashSet)4 GenericUDAFInfo (org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo)4 AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)4 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)4 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)4 Mode (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)4 ExprNodeConverter (org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 RexNode (org.apache.calcite.rex.RexNode)2