Search in sources :

Example 1 with GlobalLimitCtx

use of org.apache.hadoop.hive.ql.parse.GlobalLimitCtx in project hive by apache.

the class GlobalLimitOptimizer method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    Context ctx = pctx.getContext();
    Map<String, TableScanOperator> topOps = pctx.getTopOps();
    GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    // is used.
    if (topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) {
        // Here we recursively check:
        // 1. whether there are exact one LIMIT in the query
        // 2. whether there is no aggregation, group-by, distinct, sort by,
        // distributed by, or table sampling in any of the sub-query.
        // The query only qualifies if both conditions are satisfied.
        // 
        // Example qualified queries:
        // CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
        // INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
        // FROM ... LIMIT...
        // SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
        // 
        TableScanOperator ts = topOps.values().iterator().next();
        Table tab = ts.getConf().getTableMetadata();
        if (tab.isNonNative()) {
            LOG.info("Not enabling limit optimization on non native table: " + tab.getTableName());
            return pctx;
        }
        // InputFormat.getSplits wont be called if no input path & TS Vertex will have 0 task parallelism
        if (tab.getStorageHandler() == null) {
            LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
            // query qualify for the optimization
            if (tempGlobalLimit != null) {
                LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
                Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
                if (!tab.isPartitioned()) {
                    if (filterOps.size() == 0) {
                        Integer tempOffset = tempGlobalLimitDesc.getOffset();
                        globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                    }
                } else {
                    // check if the pruner only contains partition columns
                    if (onlyContainsPartnCols(tab, filterOps)) {
                        String alias = (String) topOps.keySet().toArray()[0];
                        PrunedPartitionList partsList = pctx.getPrunedPartitions(alias, ts);
                        // the filter to prune correctly
                        if (!partsList.hasUnknownPartitions()) {
                            Integer tempOffset = tempGlobalLimitDesc.getOffset();
                            globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                        }
                    }
                }
                if (globalLimitCtx.isEnable()) {
                    LOG.info("Qualify the optimize that reduces input size for 'offset' for offset " + globalLimitCtx.getGlobalOffset());
                    LOG.info("Qualify the optimize that reduces input size for 'limit' for limit " + globalLimitCtx.getGlobalLimit());
                }
            }
        }
    }
    return pctx;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) SplitSample(org.apache.hadoop.hive.ql.parse.SplitSample) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) GlobalLimitCtx(org.apache.hadoop.hive.ql.parse.GlobalLimitCtx)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)1 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1 GlobalLimitCtx (org.apache.hadoop.hive.ql.parse.GlobalLimitCtx)1 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)1 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)1 SplitSample (org.apache.hadoop.hive.ql.parse.SplitSample)1 LimitDesc (org.apache.hadoop.hive.ql.plan.LimitDesc)1