Search in sources :

Example 1 with TopNKeyDesc

use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.

the class TopNKeyProcessor method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    // Get ReduceSinkOperator
    ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd;
    ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf();
    // Check whether the reduce sink operator contains top n
    if (reduceSinkDesc.getTopN() < 0 || !reduceSinkDesc.isOrdering()) {
        return null;
    }
    if (reduceSinkDesc.getTopN() > maxTopNAllowed) {
        return null;
    }
    // Check whether there already is a top n key operator
    Operator<? extends OperatorDesc> parentOperator = reduceSinkOperator.getParentOperators().get(0);
    if (parentOperator instanceof TopNKeyOperator) {
        return null;
    }
    List<ExprNodeDesc> partitionCols = Collections.emptyList();
    if (reduceSinkDesc.isPTFReduceSink()) {
        // Note: partition cols are prefix of key cols
        if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) {
            return null;
        }
        partitionCols = reduceSinkDesc.getPartitionCols();
    }
    TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols, efficiencyThreshold, checkEfficiencyNumBatches, maxNumberOfPartitions);
    copyDown(reduceSinkOperator, topNKeyDesc);
    reduceSinkDesc.setTopN(-1);
    return null;
}
Also used : TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 2 with TopNKeyDesc

use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.

the class TopNKeyPushdownProcessor method pushdownThroughGroupBy.

/**
 * Push through GroupBy. No grouping sets. If TopNKey expression is same as GroupBy expression,
 * we can push it and remove it from above GroupBy. If expression in TopNKey shared common
 * prefix with GroupBy, TopNKey could be pushed through GroupBy using that prefix and kept above
 * it.
 *
 * @param topNKey TopNKey operator to push
 * @throws SemanticException when removeChildAndAdoptItsChildren was not successful
 */
private void pushdownThroughGroupBy(TopNKeyOperator topNKey) throws SemanticException {
    final GroupByOperator groupBy = (GroupByOperator) topNKey.getParentOperators().get(0);
    final GroupByDesc groupByDesc = groupBy.getConf();
    final TopNKeyDesc topNKeyDesc = topNKey.getConf();
    CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, groupByDesc);
    if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
        return;
    }
    LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), groupBy.getName());
    final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
    pushdown((TopNKeyOperator) copyDown(groupBy, newTopNKeyDesc));
    if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
        LOG.debug("Removing {} above {}", topNKey.getName(), groupBy.getName());
        groupBy.removeChildAndAdoptItsChildren(topNKey);
    }
}
Also used : TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 3 with TopNKeyDesc

use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.

the class Vectorizer method vectorizeTopNKeyOperator.

private static Operator<? extends OperatorDesc> vectorizeTopNKeyOperator(Operator<? extends OperatorDesc> topNKeyOperator, VectorizationContext vContext, VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException {
    TopNKeyDesc topNKeyDesc = (TopNKeyDesc) topNKeyOperator.getConf();
    VectorExpression[] keyExpressions = getVectorExpressions(vContext, topNKeyDesc.getKeyColumns());
    VectorExpression[] partitionKeyExpressions = getVectorExpressions(vContext, topNKeyDesc.getPartitionKeyColumns());
    vectorTopNKeyDesc.setKeyExpressions(keyExpressions);
    vectorTopNKeyDesc.setPartitionKeyColumns(partitionKeyExpressions);
    return OperatorFactory.getVectorOperator(topNKeyOperator.getCompilationOpContext(), topNKeyDesc, vContext, vectorTopNKeyDesc);
}
Also used : VectorTopNKeyDesc(org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc) TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)

Example 4 with TopNKeyDesc

use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.

the class TopNKeyPushdownProcessor method pushdownThroughTopNKey.

/**
 * Push through another Top N Key operator.
 * If the TNK operators are the same one of them will be removed. See {@link TopNKeyDesc#isSame}
 * else If expression in <code>topnKey</code> is a common prefix in it's parent TNK op and topN property is same
 * then <code>topnkey</code> could be pushed through parent.
 * If the Top N Key operator can not be pushed through this method tries to remove one of them:
 * - if topN property is the same and the keys of one of the operators are subset of the other then the operator
 *   can be removed
 * - if the keys are the same operator with higher topN value can be removed
 * @param topNKey TopNKey operator to push
 * @throws SemanticException when removeChildAndAdoptItsChildren was not successful
 */
private void pushdownThroughTopNKey(TopNKeyOperator topNKey) throws SemanticException {
    TopNKeyOperator parent = (TopNKeyOperator) topNKey.getParentOperators().get(0);
    if (hasSameTopNKeyDesc(parent, topNKey.getConf())) {
        LOG.debug("Removing {} above same operator: {}", topNKey.getName(), parent.getName());
        parent.removeChildAndAdoptItsChildren(topNKey);
        return;
    }
    TopNKeyDesc topNKeyDesc = topNKey.getConf();
    TopNKeyDesc parentTopNKeyDesc = parent.getConf();
    CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), parentTopNKeyDesc.getKeyColumns(), parentTopNKeyDesc.getColumnSortOrder(), parentTopNKeyDesc.getNullOrder());
    if (topNKeyDesc.getTopN() == parentTopNKeyDesc.getTopN()) {
        if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
            // TNK keys are subset of the parent TNK keys
            pushdownThroughParent(topNKey);
            if (topNKey.getChildOperators().get(0).getType() == OperatorType.TOPNKEY) {
                LOG.debug("Removing {} since child {} supersedes it", parent.getName(), topNKey.getName());
                topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey);
            }
        } else if (parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
            // parent TNK keys are subset of TNK keys
            LOG.debug("Removing parent of {} since it supersedes", topNKey.getName());
            parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent);
        }
    } else if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size() && parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
        if (topNKeyDesc.getTopN() > parentTopNKeyDesc.getTopN()) {
            LOG.debug("Removing {}. Parent {} has same keys but lower topN {} > {}", topNKey.getName(), parent.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN());
            topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey);
        } else {
            LOG.debug("Removing parent {}. {} has same keys but lower topN {} < {}", parent.getName(), topNKey.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN());
            parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent);
        }
    }
}
Also used : TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator)

Example 5 with TopNKeyDesc

use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.

the class TopNKeyPushdownProcessor method hasSameTopNKeyDesc.

private static boolean hasSameTopNKeyDesc(Operator<? extends OperatorDesc> operator, TopNKeyDesc desc) {
    if (!(operator instanceof TopNKeyOperator)) {
        return false;
    }
    final TopNKeyOperator topNKey = (TopNKeyOperator) operator;
    final TopNKeyDesc opDesc = topNKey.getConf();
    return opDesc.isSame(desc);
}
Also used : TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator)

Aggregations

TopNKeyDesc (org.apache.hadoop.hive.ql.plan.TopNKeyDesc)9 TopNKeyOperator (org.apache.hadoop.hive.ql.exec.TopNKeyOperator)5 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)4 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)3 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)3 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)3 CommonJoinOperator (org.apache.hadoop.hive.ql.exec.CommonJoinOperator)2 Operator (org.apache.hadoop.hive.ql.exec.Operator)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)2 JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)2 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)2 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)1 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)1 VectorTopNKeyDesc (org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc)1