Search in sources :

Example 6 with LimitOperator

use of org.apache.hadoop.hive.ql.exec.LimitOperator in project hive by apache.

the class SemanticAnalyzer method genLimitPlan.

@SuppressWarnings("nls")
private Operator genLimitPlan(String dest, Operator input, int offset, int limit) {
    // A map-only job can be optimized - instead of converting it to a
    // map-reduce job, we can have another map
    // job to do the same to avoid the cost of sorting in the map-reduce phase.
    // A better approach would be to
    // write into a local file and then have a map-only job.
    // Add the limit operator to get the value fields
    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
    LimitDesc limitDesc = new LimitDesc(offset, limit);
    globalLimitCtx.setLastReduceLimitDesc(limitDesc);
    Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    LOG.debug("Created LimitOperator Plan for clause: {} row schema: {}", dest, inputRR);
    return limitMap;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc)

Example 7 with LimitOperator

use of org.apache.hadoop.hive.ql.exec.LimitOperator in project hive by apache.

the class SemanticAnalyzer method genPlan.

@SuppressWarnings("nls")
private Operator genPlan(QB qb, boolean skipAmbiguityCheck) throws SemanticException {
    // First generate all the opInfos for the elements in the from clause
    // Must be deterministic order map - see HIVE-8707
    Map<String, Operator> aliasToOpInfo = new LinkedHashMap<String, Operator>();
    // Recurse over the subqueries to fill the subquery part of the plan
    for (String alias : qb.getSubqAliases()) {
        QBExpr qbexpr = qb.getSubqForAlias(alias);
        Operator<?> operator = genPlan(qb, qbexpr);
        aliasToOpInfo.put(alias, operator);
        if (qb.getViewToTabSchema().containsKey(alias)) {
            // we set viewProjectToTableSchema so that we can leverage ColumnPruner.
            if (operator instanceof LimitOperator) {
                // If create view has LIMIT operator, this can happen
                // Fetch parent operator
                operator = operator.getParentOperators().get(0);
            }
            if (operator instanceof SelectOperator) {
                if (this.viewProjectToTableSchema == null) {
                    this.viewProjectToTableSchema = new LinkedHashMap<>();
                }
                viewProjectToTableSchema.put((SelectOperator) operator, qb.getViewToTabSchema().get(alias));
            } else {
                throw new SemanticException("View " + alias + " is corresponding to " + operator.getType().name() + ", rather than a SelectOperator.");
            }
        }
    }
    // Recurse over all the source tables
    for (String alias : qb.getTabAliases()) {
        if (alias.equals(DUMMY_TABLE)) {
            continue;
        }
        Operator op = genTablePlan(alias, qb);
        aliasToOpInfo.put(alias, op);
    }
    if (aliasToOpInfo.isEmpty()) {
        qb.getMetaData().setSrcForAlias(DUMMY_TABLE, getDummyTable());
        TableScanOperator op = (TableScanOperator) genTablePlan(DUMMY_TABLE, qb);
        op.getConf().setRowLimit(1);
        qb.addAlias(DUMMY_TABLE);
        qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE);
        aliasToOpInfo.put(DUMMY_TABLE, op);
    }
    Operator srcOpInfo = null;
    Operator lastPTFOp = null;
    if (queryProperties.hasPTF()) {
        // After processing subqueries and source tables, process
        // partitioned table functions
        Map<ASTNode, PTFInvocationSpec> ptfNodeToSpec = qb.getPTFNodeToSpec();
        if (ptfNodeToSpec != null) {
            for (Entry<ASTNode, PTFInvocationSpec> entry : ptfNodeToSpec.entrySet()) {
                ASTNode ast = entry.getKey();
                PTFInvocationSpec spec = entry.getValue();
                String inputAlias = spec.getQueryInputName();
                Operator inOp = aliasToOpInfo.get(inputAlias);
                if (inOp == null) {
                    throw new SemanticException(generateErrorMessage(ast, "Cannot resolve input Operator for PTF invocation"));
                }
                lastPTFOp = genPTFPlan(spec, inOp);
                String ptfAlias = spec.getFunction().getAlias();
                if (ptfAlias != null) {
                    aliasToOpInfo.put(ptfAlias, lastPTFOp);
                }
            }
        }
    }
    // For all the source tables that have a lateral view, attach the
    // appropriate operators to the TS
    genLateralViewPlans(aliasToOpInfo, qb);
    // process join
    if (qb.getParseInfo().getJoinExpr() != null) {
        ASTNode joinExpr = qb.getParseInfo().getJoinExpr();
        if (joinExpr.getToken().getType() == HiveParser.TOK_UNIQUEJOIN) {
            QBJoinTree joinTree = genUniqueJoinTree(qb, joinExpr, aliasToOpInfo);
            qb.setQbJoinTree(joinTree);
        } else {
            QBJoinTree joinTree = genJoinTree(qb, joinExpr, aliasToOpInfo);
            qb.setQbJoinTree(joinTree);
            /*
         * if there is only one destination in Query try to push where predicates
         * as Join conditions
         */
            Set<String> dests = qb.getParseInfo().getClauseNames();
            if (dests.size() == 1 && joinTree.getNoOuterJoin()) {
                String dest = dests.iterator().next();
                ASTNode whereClause = qb.getParseInfo().getWhrForClause(dest);
                if (whereClause != null) {
                    extractJoinCondsFromWhereClause(joinTree, (ASTNode) whereClause.getChild(0), aliasToOpInfo);
                }
            }
            if (!disableJoinMerge) {
                mergeJoinTree(qb);
            }
        }
        // if any filters are present in the join tree, push them on top of the
        // table
        pushJoinFilters(qb, qb.getQbJoinTree(), aliasToOpInfo);
        srcOpInfo = genJoinPlan(qb, aliasToOpInfo);
    } else {
        // Now if there are more than 1 sources then we have a join case
        // later we can extend this to the union all case as well
        srcOpInfo = aliasToOpInfo.values().iterator().next();
        // with ptfs, there maybe more (note for PTFChains:
        // 1 ptf invocation may entail multiple PTF operators)
        srcOpInfo = lastPTFOp != null ? lastPTFOp : srcOpInfo;
    }
    Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo, aliasToOpInfo);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Created Plan for Query Block " + qb.getId());
    }
    if (qb.getAlias() != null) {
        rewriteRRForSubQ(qb.getAlias(), bodyOpInfo, skipAmbiguityCheck);
    }
    setQB(qb);
    return bodyOpInfo;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) LinkedHashMap(java.util.LinkedHashMap) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)

Aggregations

LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)7 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)6 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)6 Operator (org.apache.hadoop.hive.ql.exec.Operator)5 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)5 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)4 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)4 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)4 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)4 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)3 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)3 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)3 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)3 LimitDesc (org.apache.hadoop.hive.ql.plan.LimitDesc)3 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)2 ImmutableSet (com.google.common.collect.ImmutableSet)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1