Search in sources :

Example 1 with LimitDesc

use of org.apache.hadoop.hive.ql.plan.LimitDesc in project hive by apache.

the class TestVectorLimitOperator method validateVectorLimitOperator.

private void validateVectorLimitOperator(int limit, int batchSize, int expectedBatchSize) throws HiveException {
    @SuppressWarnings("unchecked") FakeVectorRowBatchFromObjectIterables frboi = new FakeVectorRowBatchFromObjectIterables(batchSize, new String[] { "tinyint", "double" }, Arrays.asList(new Object[] { 1, 2, 3, 4 }), Arrays.asList(new Object[] { 323.0, 34.5, null, 89.3 }));
    // Get next batch
    VectorizedRowBatch vrb = frboi.produceNextBatch();
    // Create limit desc with limit value
    LimitDesc ld = new LimitDesc(limit);
    VectorLimitOperator lo = new VectorLimitOperator(new CompilationOpContext(), null, ld);
    lo.initialize(new Configuration(), null);
    // Process the batch
    lo.process(vrb, 0);
    // Verify batch size
    Assert.assertEquals(vrb.size, expectedBatchSize);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) FakeVectorRowBatchFromObjectIterables(org.apache.hadoop.hive.ql.exec.vector.util.FakeVectorRowBatchFromObjectIterables) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc)

Example 2 with LimitDesc

use of org.apache.hadoop.hive.ql.plan.LimitDesc in project hive by apache.

the class GlobalLimitOptimizer method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    Context ctx = pctx.getContext();
    Map<String, TableScanOperator> topOps = pctx.getTopOps();
    GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    // is used.
    if (ctx.getTryCount() == 0 && topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) {
        // Here we recursively check:
        // 1. whether there are exact one LIMIT in the query
        // 2. whether there is no aggregation, group-by, distinct, sort by,
        //    distributed by, or table sampling in any of the sub-query.
        // The query only qualifies if both conditions are satisfied.
        //
        // Example qualified queries:
        //    CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
        //    INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
        //                               FROM ... LIMIT...
        //    SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
        //
        TableScanOperator ts = topOps.values().iterator().next();
        LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
        // query qualify for the optimization
        if (tempGlobalLimit != null) {
            LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
            Table tab = ts.getConf().getTableMetadata();
            Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
            if (!tab.isPartitioned()) {
                if (filterOps.size() == 0) {
                    Integer tempOffset = tempGlobalLimitDesc.getOffset();
                    globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                }
            } else {
                // check if the pruner only contains partition columns
                if (onlyContainsPartnCols(tab, filterOps)) {
                    String alias = (String) topOps.keySet().toArray()[0];
                    PrunedPartitionList partsList = pctx.getPrunedPartitions(alias, ts);
                    // the filter to prune correctly
                    if (!partsList.hasUnknownPartitions()) {
                        Integer tempOffset = tempGlobalLimitDesc.getOffset();
                        globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                    }
                }
            }
            if (globalLimitCtx.isEnable()) {
                LOG.info("Qualify the optimize that reduces input size for 'offset' for offset " + globalLimitCtx.getGlobalOffset());
                LOG.info("Qualify the optimize that reduces input size for 'limit' for limit " + globalLimitCtx.getGlobalLimit());
            }
        }
    }
    return pctx;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) SplitSample(org.apache.hadoop.hive.ql.parse.SplitSample) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) GlobalLimitCtx(org.apache.hadoop.hive.ql.parse.GlobalLimitCtx)

Example 3 with LimitDesc

use of org.apache.hadoop.hive.ql.plan.LimitDesc in project hive by apache.

the class SemanticAnalyzer method genLimitPlan.

@SuppressWarnings("nls")
private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit) throws SemanticException {
    // A map-only job can be optimized - instead of converting it to a
    // map-reduce job, we can have another map
    // job to do the same to avoid the cost of sorting in the map-reduce phase.
    // A better approach would be to
    // write into a local file and then have a map-only job.
    // Add the limit operator to get the value fields
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    LimitDesc limitDesc = new LimitDesc(offset, limit);
    globalLimitCtx.setLastReduceLimitDesc(limitDesc);
    Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created LimitOperator Plan for clause: " + dest + " row schema: " + inputRR.toString());
    }
    return limitMap;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc)

Example 4 with LimitDesc

use of org.apache.hadoop.hive.ql.plan.LimitDesc in project hive by apache.

the class Vectorizer method vectorizeOperator.

public Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op, VectorizationContext vContext, boolean isTezOrSpark, VectorTaskColumnInfo vectorTaskColumnInfo) throws HiveException {
    Operator<? extends OperatorDesc> vectorOp = null;
    boolean isNative;
    switch(op.getType()) {
        case TABLESCAN:
            vectorOp = vectorizeTableScanOperator(op, vContext);
            isNative = true;
            break;
        case MAPJOIN:
            {
                if (op instanceof MapJoinOperator) {
                    VectorMapJoinInfo vectorMapJoinInfo = new VectorMapJoinInfo();
                    MapJoinDesc desc = (MapJoinDesc) op.getConf();
                    boolean specialize = canSpecializeMapJoin(op, desc, isTezOrSpark, vContext, vectorMapJoinInfo);
                    if (!specialize) {
                        Class<? extends Operator<?>> opClass = null;
                        // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered...
                        List<ExprNodeDesc> bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable());
                        boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0);
                        if (!isOuterAndFiltered) {
                            opClass = VectorMapJoinOperator.class;
                        } else {
                            opClass = VectorMapJoinOuterFilteredOperator.class;
                        }
                        vectorOp = OperatorFactory.getVectorOperator(opClass, op.getCompilationOpContext(), op.getConf(), vContext);
                        isNative = false;
                    } else {
                        // TEMPORARY Until Native Vector Map Join with Hybrid passes tests...
                        // HiveConf.setBoolVar(physicalContext.getConf(),
                        //    HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false);
                        vectorOp = specializeMapJoinOperator(op, vContext, desc, vectorMapJoinInfo);
                        isNative = true;
                        if (vectorTaskColumnInfo != null) {
                            if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableKeyExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                            if (usesVectorUDFAdaptor(vectorMapJoinInfo.getBigTableValueExpressions())) {
                                vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                            }
                        }
                    }
                } else {
                    Preconditions.checkState(op instanceof SMBMapJoinOperator);
                    SMBJoinDesc smbJoinSinkDesc = (SMBJoinDesc) op.getConf();
                    VectorSMBJoinDesc vectorSMBJoinDesc = new VectorSMBJoinDesc();
                    smbJoinSinkDesc.setVectorDesc(vectorSMBJoinDesc);
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), smbJoinSinkDesc, vContext);
                    isNative = false;
                }
            }
            break;
        case REDUCESINK:
            {
                VectorReduceSinkInfo vectorReduceSinkInfo = new VectorReduceSinkInfo();
                ReduceSinkDesc desc = (ReduceSinkDesc) op.getConf();
                boolean specialize = canSpecializeReduceSink(desc, isTezOrSpark, vContext, vectorReduceSinkInfo);
                if (!specialize) {
                    vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), op.getConf(), vContext);
                    isNative = false;
                } else {
                    vectorOp = specializeReduceSinkOperator(op, vContext, desc, vectorReduceSinkInfo);
                    isNative = true;
                    if (vectorTaskColumnInfo != null) {
                        if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkKeyExpressions())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                        if (usesVectorUDFAdaptor(vectorReduceSinkInfo.getReduceSinkValueExpressions())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
            }
            break;
        case FILTER:
            {
                vectorOp = vectorizeFilterOperator(op, vContext);
                isNative = true;
                if (vectorTaskColumnInfo != null) {
                    VectorFilterDesc vectorFilterDesc = (VectorFilterDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    VectorExpression vectorPredicateExpr = vectorFilterDesc.getPredicateExpression();
                    if (usesVectorUDFAdaptor(vectorPredicateExpr)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                }
            }
            break;
        case SELECT:
            {
                vectorOp = vectorizeSelectOperator(op, vContext);
                isNative = true;
                if (vectorTaskColumnInfo != null) {
                    VectorSelectDesc vectorSelectDesc = (VectorSelectDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    VectorExpression[] vectorSelectExprs = vectorSelectDesc.getSelectExpressions();
                    if (usesVectorUDFAdaptor(vectorSelectExprs)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                }
            }
            break;
        case GROUPBY:
            {
                vectorOp = vectorizeGroupByOperator(op, vContext);
                isNative = false;
                if (vectorTaskColumnInfo != null) {
                    VectorGroupByDesc vectorGroupByDesc = (VectorGroupByDesc) ((AbstractOperatorDesc) vectorOp.getConf()).getVectorDesc();
                    if (!vectorGroupByDesc.isVectorOutput()) {
                        vectorTaskColumnInfo.setGroupByVectorOutput(false);
                    }
                    VectorExpression[] vecKeyExpressions = vectorGroupByDesc.getKeyExpressions();
                    if (usesVectorUDFAdaptor(vecKeyExpressions)) {
                        vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                    }
                    VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators();
                    for (VectorAggregateExpression vecAggr : vecAggregators) {
                        if (usesVectorUDFAdaptor(vecAggr.inputExpression())) {
                            vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true);
                        }
                    }
                }
            }
            break;
        case FILESINK:
            {
                FileSinkDesc fileSinkDesc = (FileSinkDesc) op.getConf();
                VectorFileSinkDesc vectorFileSinkDesc = new VectorFileSinkDesc();
                fileSinkDesc.setVectorDesc(vectorFileSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), fileSinkDesc, vContext);
                isNative = false;
            }
            break;
        case LIMIT:
            {
                LimitDesc limitDesc = (LimitDesc) op.getConf();
                VectorLimitDesc vectorLimitDesc = new VectorLimitDesc();
                limitDesc.setVectorDesc(vectorLimitDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), limitDesc, vContext);
                isNative = true;
            }
            break;
        case EVENT:
            {
                AppMasterEventDesc eventDesc = (AppMasterEventDesc) op.getConf();
                VectorAppMasterEventDesc vectorEventDesc = new VectorAppMasterEventDesc();
                eventDesc.setVectorDesc(vectorEventDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), eventDesc, vContext);
                isNative = true;
            }
            break;
        case HASHTABLESINK:
            {
                SparkHashTableSinkDesc sparkHashTableSinkDesc = (SparkHashTableSinkDesc) op.getConf();
                VectorSparkHashTableSinkDesc vectorSparkHashTableSinkDesc = new VectorSparkHashTableSinkDesc();
                sparkHashTableSinkDesc.setVectorDesc(vectorSparkHashTableSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkHashTableSinkDesc, vContext);
                isNative = true;
            }
            break;
        case SPARKPRUNINGSINK:
            {
                SparkPartitionPruningSinkDesc sparkPartitionPruningSinkDesc = (SparkPartitionPruningSinkDesc) op.getConf();
                VectorSparkPartitionPruningSinkDesc vectorSparkPartitionPruningSinkDesc = new VectorSparkPartitionPruningSinkDesc();
                sparkPartitionPruningSinkDesc.setVectorDesc(vectorSparkPartitionPruningSinkDesc);
                vectorOp = OperatorFactory.getVectorOperator(op.getCompilationOpContext(), sparkPartitionPruningSinkDesc, vContext);
                isNative = true;
            }
            break;
        default:
            // These are children of GROUP BY operators with non-vector outputs.
            isNative = false;
            vectorOp = op;
            break;
    }
    Preconditions.checkState(vectorOp != null);
    if (vectorTaskColumnInfo != null && !isNative) {
        vectorTaskColumnInfo.setAllNative(false);
    }
    LOG.debug("vectorizeOperator " + vectorOp.getClass().getName());
    LOG.debug("vectorizeOperator " + vectorOp.getConf().getClass().getName());
    if (vectorOp != op) {
        fixupParentChildOperators(op, vectorOp);
        ((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true);
    }
    return vectorOp;
}
Also used : VectorMapJoinInnerStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator) VectorReduceSinkLongOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator) VectorMapJoinOuterLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator) VectorReduceSinkStringOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator) VectorMapJoinInnerBigOnlyMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator) VectorMapJoinLeftSemiMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator) VectorMapJoinLeftSemiStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator) VectorMapJoinLeftSemiLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator) VectorReduceSinkMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) VectorMapJoinInnerBigOnlyLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator) VectorMapJoinInnerBigOnlyStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator) VectorMapJoinInnerMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator) VectorMapJoinOuterStringOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorMapJoinInnerLongOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator) VectorMapJoinOuterMultiKeyOperator(org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator) AppMasterEventDesc(org.apache.hadoop.hive.ql.plan.AppMasterEventDesc) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) SMBJoinDesc(org.apache.hadoop.hive.ql.plan.SMBJoinDesc) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFileSinkDesc(org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc) VectorReduceSinkInfo(org.apache.hadoop.hive.ql.plan.VectorReduceSinkInfo) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) VectorAppMasterEventDesc(org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc) ArrayList(java.util.ArrayList) List(java.util.List) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) VectorMapJoinOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator) VectorFilterDesc(org.apache.hadoop.hive.ql.plan.VectorFilterDesc) SparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) AbstractOperatorDesc(org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) VectorMapJoinDesc(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc) VectorSparkHashTableSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc) VectorSparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc) VectorMapJoinInfo(org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo) VectorSMBJoinDesc(org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) VectorLimitDesc(org.apache.hadoop.hive.ql.plan.VectorLimitDesc) VectorMapJoinOuterFilteredOperator(org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 5 with LimitDesc

use of org.apache.hadoop.hive.ql.plan.LimitDesc in project hive by apache.

the class HiveOpConverter method visit.

OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
    OpAttr inputOpAf = dispatch(sortRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " with row type: [" + sortRel.getRowType() + "]");
        if (sortRel.getCollation() == RelCollations.EMPTY) {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of limit");
        } else if (sortRel.fetch == null) {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort");
        } else {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort+limit");
        }
    }
    Operator<?> inputOp = inputOpAf.inputs.get(0);
    Operator<?> resultOp = inputOpAf.inputs.get(0);
    // of their columns
    if (sortRel.getCollation() != RelCollations.EMPTY) {
        // In strict mode, in the presence of order by, limit must be specified.
        if (sortRel.fetch == null) {
            String error = StrictChecks.checkNoLimit(hiveConf);
            if (error != null)
                throw new SemanticException(error);
        }
        // 1.a. Extract order for each column from collation
        // Generate sortCols and order
        ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
        ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
        Map<Integer, RexNode> obRefToCallMap = sortRel.getInputRefToCallMap();
        List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
        StringBuilder order = new StringBuilder();
        StringBuilder nullOrder = new StringBuilder();
        for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) {
            int sortColumnPos = sortInfo.getFieldIndex();
            ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature().get(sortColumnPos));
            ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
            sortCols.add(sortColumn);
            if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
                order.append("-");
            } else {
                order.append("+");
            }
            if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) {
                nullOrder.append("a");
            } else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) {
                nullOrder.append("z");
            } else {
                // Default
                nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a");
            }
            if (obRefToCallMap != null) {
                RexNode obExpr = obRefToCallMap.get(sortColumnPos);
                sortColsPosBuilder.set(sortColumnPos);
                if (obExpr == null) {
                    sortOutputColsPosBuilder.set(sortColumnPos);
                }
            }
        }
        // Use only 1 reducer for order by
        int numReducers = 1;
        // We keep the columns only the columns that are part of the final output
        List<String> keepColumns = new ArrayList<String>();
        final ImmutableBitSet sortColsPos = sortColsPosBuilder.build();
        final ImmutableBitSet sortOutputColsPos = sortOutputColsPosBuilder.build();
        final ArrayList<ColumnInfo> inputSchema = inputOp.getSchema().getSignature();
        for (int pos = 0; pos < inputSchema.size(); pos++) {
            if ((sortColsPos.get(pos) && sortOutputColsPos.get(pos)) || (!sortColsPos.get(pos) && !sortOutputColsPos.get(pos))) {
                keepColumns.add(inputSchema.get(pos).getInternalName());
            }
        }
        // 1.b. Generate reduce sink and project operator
        resultOp = genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList<ExprNodeDesc>(), order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveConf, keepColumns);
    }
    // 2. If we need to generate limit
    if (sortRel.fetch != null) {
        int limit = RexLiteral.intValue(sortRel.fetch);
        int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset);
        LimitDesc limitDesc = new LimitDesc(offset, limit);
        ArrayList<ColumnInfo> cinfoLst = createColInfos(resultOp);
        resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
        }
    }
    // 3. Return result
    return inputOpAf.clone(resultOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

LimitDesc (org.apache.hadoop.hive.ql.plan.LimitDesc)5 ArrayList (java.util.ArrayList)2 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)2 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)2 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)2 List (java.util.List)1 RelFieldCollation (org.apache.calcite.rel.RelFieldCollation)1 RexNode (org.apache.calcite.rex.RexNode)1 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)1 Configuration (org.apache.hadoop.conf.Configuration)1 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)1 Context (org.apache.hadoop.hive.ql.Context)1 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)1 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)1 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)1 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)1