Search in sources :

Example 6 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveSortLimitVisitor method visit.

@Override
OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
    OpAttr inputOpAf = hiveOpConverter.dispatch(sortRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " with row type: [" + sortRel.getRowType() + "]");
        if (sortRel.getCollation() == RelCollations.EMPTY) {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of limit");
        } else if (sortRel.fetch == null) {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort");
        } else {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort+limit");
        }
    }
    Operator<?> inputOp = inputOpAf.inputs.get(0);
    Operator<?> resultOp = inputOpAf.inputs.get(0);
    // of their columns
    if (sortRel.getCollation() != RelCollations.EMPTY) {
        // In strict mode, in the presence of order by, limit must be specified.
        if (sortRel.fetch == null) {
            String error = StrictChecks.checkNoLimit(hiveOpConverter.getHiveConf());
            if (error != null)
                throw new SemanticException(error);
        }
        // 1.a. Extract order for each column from collation
        // Generate sortCols and order
        ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
        ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
        Map<Integer, RexNode> obRefToCallMap = sortRel.getInputRefToCallMap();
        List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
        StringBuilder order = new StringBuilder();
        StringBuilder nullOrder = new StringBuilder();
        for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) {
            int sortColumnPos = sortInfo.getFieldIndex();
            ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature().get(sortColumnPos));
            ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
            sortCols.add(sortColumn);
            if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
                order.append("-");
            } else {
                order.append("+");
            }
            if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) {
                nullOrder.append("a");
            } else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) {
                nullOrder.append("z");
            } else {
                // Default
                nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a");
            }
            if (obRefToCallMap != null) {
                RexNode obExpr = obRefToCallMap.get(sortColumnPos);
                sortColsPosBuilder.set(sortColumnPos);
                if (obExpr == null) {
                    sortOutputColsPosBuilder.set(sortColumnPos);
                }
            }
        }
        // Use only 1 reducer for order by
        int numReducers = 1;
        // We keep the columns only the columns that are part of the final output
        List<String> keepColumns = new ArrayList<String>();
        final ImmutableBitSet sortColsPos = sortColsPosBuilder.build();
        final ImmutableBitSet sortOutputColsPos = sortOutputColsPosBuilder.build();
        final List<ColumnInfo> inputSchema = inputOp.getSchema().getSignature();
        for (int pos = 0; pos < inputSchema.size(); pos++) {
            if ((sortColsPos.get(pos) && sortOutputColsPos.get(pos)) || (!sortColsPos.get(pos) && !sortOutputColsPos.get(pos))) {
                keepColumns.add(inputSchema.get(pos).getInternalName());
            }
        }
        // 1.b. Generate reduce sink and project operator
        resultOp = HiveOpConverterUtils.genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList<ExprNodeDesc>(), order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveOpConverter.getHiveConf(), keepColumns);
    }
    // 2. If we need to generate limit
    if (sortRel.fetch != null) {
        int limit = RexLiteral.intValue(sortRel.fetch);
        int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset);
        LimitDesc limitDesc = new LimitDesc(offset, limit);
        ArrayList<ColumnInfo> cinfoLst = HiveOpConverterUtils.createColInfos(resultOp);
        resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
        }
    }
    // 3. Return result
    return inputOpAf.clone(resultOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RexNode(org.apache.calcite.rex.RexNode)

Example 7 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class JoinVisitor method visit.

@Override
OpAttr visit(RelNode joinRel) throws SemanticException {
    // 0. Additional data structures needed for the join optimization
    // through Hive
    String[] baseSrc = new String[joinRel.getInputs().size()];
    String tabAlias = hiveOpConverter.getHiveDerivedTableAlias();
    // 1. Convert inputs
    OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
    List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
    for (int i = 0; i < inputs.length; i++) {
        inputs[i] = hiveOpConverter.dispatch(joinRel.getInput(i));
        children.add(inputs[i].inputs.get(0));
        baseSrc[i] = inputs[i].tabAlias;
    }
    // 2. Generate tags
    for (int tag = 0; tag < children.size(); tag++) {
        ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
        reduceSinkOp.getConf().setTag(tag);
    }
    // 3. Virtual columns
    Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
    newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
    if (joinRel instanceof HiveMultiJoin || !((joinRel instanceof Join) && ((((Join) joinRel).isSemiJoin()) || (((Join) joinRel).getJoinType() == JoinRelType.ANTI)))) {
        int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
        for (int i = 1; i < inputs.length; i++) {
            newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
            shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
    }
    // 4. Extract join key expressions from HiveSortExchange
    ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
    for (int i = 0; i < inputs.length; i++) {
        joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getKeyExpressions();
    }
    // 5. Extract rest of join predicate info. We infer the rest of join condition
    // that will be added to the filters (join conditions that are not part of
    // the join key)
    List<RexNode> joinFilters;
    if (joinRel instanceof HiveJoin) {
        joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
    } else if (joinRel instanceof HiveMultiJoin) {
        joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
    } else if (joinRel instanceof HiveSemiJoin) {
        joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
    } else if (joinRel instanceof HiveAntiJoin) {
        joinFilters = ImmutableList.of(((HiveAntiJoin) joinRel).getJoinFilter());
    } else {
        throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
    }
    List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
    for (int i = 0; i < joinFilters.size(); i++) {
        List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
        if (joinFilters.get(i) != null) {
            for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
                ExprNodeDesc expr = HiveOpConverterUtils.convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
                filterExpressionsForInput.add(expr);
            }
        }
        filterExpressions.add(filterExpressionsForInput);
    }
    // 6. Generate Join operator
    JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
    // 7. Return result
    return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ArrayList(java.util.ArrayList) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) RexNode(org.apache.calcite.rex.RexNode)

Example 8 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genReduceGBRS.

private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0);
    List<ColumnInfo> gb1ColInfoLst = reduceSideGB1.getSchema().getSignature();
    ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true);
    if (inclGrpSetInReduceSide(gbInfo)) {
        addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, outputColumnNames, colInfoLst, colExprMap);
    }
    ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf().getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true);
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), AcidUtils.Operation.NOT_ACID, gbInfo.defaultNullOrder), new RowSchema(colInfoLst), reduceSideGB1);
    rsOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 9 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method genMapSideRS.

private static OpAttr genMapSideRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
    List<String> outputKeyColumnNames = new ArrayList<String>();
    List<String> outputValueColumnNames = new ArrayList<String>();
    ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
    String outputColName;
    // 1. Add GB Keys to reduce keys
    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
        // gbInfo already has ExprNode for gbkeys
        reduceKeys.add(gbInfo.gbKeys.get(i));
        String colOutputName = SemanticAnalyzer.getColumnInternalName(i);
        outputKeyColumnNames.add(colOutputName);
        colInfoLst.add(new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + colOutputName, gbInfo.gbKeyTypes.get(i), "", false));
        colExprMap.put(colOutputName, gbInfo.gbKeys.get(i));
    }
    // Note: GROUPING SETS are not allowed with map side aggregation set to false so we don't have to worry about it
    int keyLength = reduceKeys.size();
    // 2. Add Dist UDAF args to reduce keys
    if (gbInfo.containsDistinctAggr) {
        // TODO: Why is this needed (doesn't represent any cols)
        String udafName = SemanticAnalyzer.getColumnInternalName(reduceKeys.size());
        outputKeyColumnNames.add(udafName);
        for (int i = 0; i < gbInfo.distExprNodes.size(); i++) {
            reduceKeys.add(gbInfo.distExprNodes.get(i));
            // this part of reduceKeys is later used to create column names strictly for non-distinct aggregates
            // with parameters same as distinct keys which expects _col0 at the end. So we always append
            // _col0 at the end instead of _col<i>
            outputColName = SemanticAnalyzer.getColumnInternalName(0);
            String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + i + "." + outputColName;
            ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, false);
            colInfoLst.add(colInfo);
            colExprMap.put(field, gbInfo.distExprNodes.get(i));
        }
    }
    // 3. Add UDAF args deduped to reduce values
    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
    for (int i = 0; i < gbInfo.deDupedNonDistIrefs.size(); i++) {
        reduceValues.add(gbInfo.deDupedNonDistIrefs.get(i));
        outputColName = SemanticAnalyzer.getColumnInternalName(reduceValues.size() - 1);
        outputValueColumnNames.add(outputColName);
        String field = Utilities.ReduceField.VALUE.toString() + "." + outputColName;
        colInfoLst.add(new ColumnInfo(field, reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null, false));
        colExprMap.put(field, reduceValues.get(reduceValues.size() - 1));
    }
    // 4. Gen RS
    ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID, gbInfo.defaultNullOrder), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
    rsOp.setColumnExprMap(colExprMap);
    return new OpAttr("", new HashSet<Integer>(), rsOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 10 with OpAttr

use of org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr in project hive by apache.

the class HiveGBOpConvUtil method translateGB.

static OpAttr translateGB(OpAttr inputOpAf, HiveAggregate aggRel, HiveConf hc) throws SemanticException {
    OpAttr translatedGBOpAttr = null;
    GBInfo gbInfo = getGBInfo(aggRel, inputOpAf, hc);
    switch(gbInfo.gbPhysicalPipelineMode) {
        case MAP_SIDE_GB_NO_SKEW_NO_ADD_MR_JOB:
            translatedGBOpAttr = genMapSideGBNoSkewNoAddMRJob(inputOpAf, aggRel, gbInfo);
            break;
        case MAP_SIDE_GB_NO_SKEW_ADD_MR_JOB:
            translatedGBOpAttr = genMapSideGBNoSkewAddMRJob(inputOpAf, aggRel, gbInfo);
            break;
        case MAP_SIDE_GB_SKEW_GBKEYS_OR_DIST_UDAF_PRESENT:
            translatedGBOpAttr = genMapSideGBSkewGBKeysOrDistUDAFPresent(inputOpAf, aggRel, gbInfo);
            break;
        case MAP_SIDE_GB_SKEW_GBKEYS_AND_DIST_UDAF_NOT_PRESENT:
            translatedGBOpAttr = genMapSideGBSkewGBKeysAndDistUDAFNotPresent(inputOpAf, aggRel, gbInfo);
            break;
        case NO_MAP_SIDE_GB_NO_SKEW:
            translatedGBOpAttr = genNoMapSideGBNoSkew(inputOpAf, aggRel, gbInfo);
            break;
        case NO_MAP_SIDE_GB_SKEW:
            translatedGBOpAttr = genNoMapSideGBSkew(inputOpAf, aggRel, gbInfo);
            break;
    }
    return translatedGBOpAttr;
}
Also used : OpAttr(org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr)

Aggregations

OpAttr (org.apache.hadoop.hive.ql.optimizer.calcite.translator.opconventer.HiveOpConverter.OpAttr)21 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)13 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)13 ArrayList (java.util.ArrayList)12 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)12 HashMap (java.util.HashMap)9 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)9 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)6 Operator (org.apache.hadoop.hive.ql.exec.Operator)6 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)5 HashSet (java.util.HashSet)4 GenericUDAFInfo (org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.GenericUDAFInfo)4 AggregationDesc (org.apache.hadoop.hive.ql.plan.AggregationDesc)4 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)4 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)4 Mode (org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode)4 ExprNodeConverter (org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 RexNode (org.apache.calcite.rex.RexNode)2