Search in sources :

Example 1 with TopNKeyOperator

use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.

the class TopNKeyProcessor method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    // Get ReduceSinkOperator
    ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd;
    ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf();
    // Check whether the reduce sink operator contains top n
    if (reduceSinkDesc.getTopN() < 0 || !reduceSinkDesc.isOrdering()) {
        return null;
    }
    if (reduceSinkDesc.getTopN() > maxTopNAllowed) {
        return null;
    }
    // Check whether there already is a top n key operator
    Operator<? extends OperatorDesc> parentOperator = reduceSinkOperator.getParentOperators().get(0);
    if (parentOperator instanceof TopNKeyOperator) {
        return null;
    }
    List<ExprNodeDesc> partitionCols = Collections.emptyList();
    if (reduceSinkDesc.isPTFReduceSink()) {
        // Note: partition cols are prefix of key cols
        if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) {
            return null;
        }
        partitionCols = reduceSinkDesc.getPartitionCols();
    }
    TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols, efficiencyThreshold, checkEfficiencyNumBatches, maxNumberOfPartitions);
    copyDown(reduceSinkOperator, topNKeyDesc);
    reduceSinkDesc.setTopN(-1);
    return null;
}
Also used : TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Example 2 with TopNKeyOperator

use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.

the class TopNKeyPushdownProcessor method pushdownThroughTopNKey.

/**
 * Push through another Top N Key operator.
 * If the TNK operators are the same one of them will be removed. See {@link TopNKeyDesc#isSame}
 * else If expression in <code>topnKey</code> is a common prefix in it's parent TNK op and topN property is same
 * then <code>topnkey</code> could be pushed through parent.
 * If the Top N Key operator can not be pushed through this method tries to remove one of them:
 * - if topN property is the same and the keys of one of the operators are subset of the other then the operator
 *   can be removed
 * - if the keys are the same operator with higher topN value can be removed
 * @param topNKey TopNKey operator to push
 * @throws SemanticException when removeChildAndAdoptItsChildren was not successful
 */
private void pushdownThroughTopNKey(TopNKeyOperator topNKey) throws SemanticException {
    TopNKeyOperator parent = (TopNKeyOperator) topNKey.getParentOperators().get(0);
    if (hasSameTopNKeyDesc(parent, topNKey.getConf())) {
        LOG.debug("Removing {} above same operator: {}", topNKey.getName(), parent.getName());
        parent.removeChildAndAdoptItsChildren(topNKey);
        return;
    }
    TopNKeyDesc topNKeyDesc = topNKey.getConf();
    TopNKeyDesc parentTopNKeyDesc = parent.getConf();
    CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), parentTopNKeyDesc.getKeyColumns(), parentTopNKeyDesc.getColumnSortOrder(), parentTopNKeyDesc.getNullOrder());
    if (topNKeyDesc.getTopN() == parentTopNKeyDesc.getTopN()) {
        if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
            // TNK keys are subset of the parent TNK keys
            pushdownThroughParent(topNKey);
            if (topNKey.getChildOperators().get(0).getType() == OperatorType.TOPNKEY) {
                LOG.debug("Removing {} since child {} supersedes it", parent.getName(), topNKey.getName());
                topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey);
            }
        } else if (parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
            // parent TNK keys are subset of TNK keys
            LOG.debug("Removing parent of {} since it supersedes", topNKey.getName());
            parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent);
        }
    } else if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size() && parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
        if (topNKeyDesc.getTopN() > parentTopNKeyDesc.getTopN()) {
            LOG.debug("Removing {}. Parent {} has same keys but lower topN {} > {}", topNKey.getName(), parent.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN());
            topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey);
        } else {
            LOG.debug("Removing parent {}. {} has same keys but lower topN {} < {}", parent.getName(), topNKey.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN());
            parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent);
        }
    }
}
Also used : TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator)

Example 3 with TopNKeyOperator

use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.

the class TopNKeyPushdownProcessor method hasSameTopNKeyDesc.

private static boolean hasSameTopNKeyDesc(Operator<? extends OperatorDesc> operator, TopNKeyDesc desc) {
    if (!(operator instanceof TopNKeyOperator)) {
        return false;
    }
    final TopNKeyOperator topNKey = (TopNKeyOperator) operator;
    final TopNKeyDesc opDesc = topNKey.getConf();
    return opDesc.isSame(desc);
}
Also used : TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator)

Example 4 with TopNKeyOperator

use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.

the class TopNKeyPushdownProcessor method pushdownThroughLeftOuterJoin.

/**
 * Push through LOJ. If TopNKey expression refers fully to expressions from left input, push
 * with rewriting of expressions and remove from top of LOJ. If TopNKey expression has a prefix
 * that refers to expressions from left input, push with rewriting of those expressions and keep
 * on top of LOJ.
 *
 * @param topNKey TopNKey operator to push
 * @throws SemanticException when removeChildAndAdoptItsChildren was not successful
 */
private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws SemanticException {
    final TopNKeyDesc topNKeyDesc = topNKey.getConf();
    final CommonJoinOperator<? extends JoinDesc> join = (CommonJoinOperator<? extends JoinDesc>) topNKey.getParentOperators().get(0);
    final List<Operator<? extends OperatorDesc>> joinInputs = join.getParentOperators();
    final ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) joinInputs.get(0);
    final ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf();
    CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(mapUntilColumnEquals(topNKeyDesc.getKeyColumns(), join.getColumnExprMap()), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), reduceSinkDesc.getColumnExprMap(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder());
    if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
        return;
    }
    LOG.debug("Pushing a copy of {} through {} and {}", topNKey.getName(), join.getName(), reduceSinkOperator.getName());
    final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
    pushdown((TopNKeyOperator) copyDown(reduceSinkOperator, newTopNKeyDesc));
    if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
        LOG.debug("Removing {} above {}", topNKey.getName(), join.getName());
        join.removeChildAndAdoptItsChildren(topNKey);
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 5 with TopNKeyOperator

use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.

the class TopNKeyPushdownProcessor method pushdownInnerJoin.

/**
 * Tries to push the TopNKeyFilter through an inner join:
 *  requirements:
 *    - being PK-FK join
 *    - PK side is not filtered
 *    - First n TopNKey key columns (Order By) are originated from the FK side.
 * @throws SemanticException
 */
private void pushdownInnerJoin(TopNKeyOperator topNKey, int fkJoinInputIndex, boolean nonFkSideIsFiltered) throws SemanticException {
    TopNKeyDesc topNKeyDesc = topNKey.getConf();
    CommonJoinOperator<? extends JoinDesc> join = (CommonJoinOperator<? extends JoinDesc>) topNKey.getParentOperators().get(0);
    List<Operator<? extends OperatorDesc>> joinInputs = join.getParentOperators();
    ReduceSinkOperator fkJoinInput = (ReduceSinkOperator) joinInputs.get(fkJoinInputIndex);
    if (nonFkSideIsFiltered) {
        LOG.debug("Not pushing {} through {} as non FK side of the join is filtered", topNKey.getName(), join.getName());
        return;
    }
    CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(mapUntilColumnEquals(topNKeyDesc.getKeyColumns(), join.getColumnExprMap()), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), fkJoinInput.getConf().getKeyCols(), fkJoinInput.getConf().getColumnExprMap(), fkJoinInput.getConf().getOrder(), fkJoinInput.getConf().getNullOrder());
    if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
        return;
    }
    LOG.debug("Pushing a copy of {} through {} and {}", topNKey.getName(), join.getName(), fkJoinInput.getName());
    final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
    pushdown((TopNKeyOperator) copyDown(fkJoinInput, newTopNKeyDesc));
    if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
        LOG.debug("Removing {} above {}", topNKey.getName(), join.getName());
        join.removeChildAndAdoptItsChildren(topNKey);
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) TopNKeyOperator(org.apache.hadoop.hive.ql.exec.TopNKeyOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) TopNKeyDesc(org.apache.hadoop.hive.ql.plan.TopNKeyDesc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

TopNKeyOperator (org.apache.hadoop.hive.ql.exec.TopNKeyOperator)5 TopNKeyDesc (org.apache.hadoop.hive.ql.plan.TopNKeyDesc)5 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)3 CommonJoinOperator (org.apache.hadoop.hive.ql.exec.CommonJoinOperator)2 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)2 Operator (org.apache.hadoop.hive.ql.exec.Operator)2 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)2 JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)2 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)2 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)2 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)1