use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.
the class TopNKeyProcessor method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
// Get ReduceSinkOperator
ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd;
ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf();
// Check whether the reduce sink operator contains top n
if (reduceSinkDesc.getTopN() < 0 || !reduceSinkDesc.isOrdering()) {
return null;
}
if (reduceSinkDesc.getTopN() > maxTopNAllowed) {
return null;
}
// Check whether there already is a top n key operator
Operator<? extends OperatorDesc> parentOperator = reduceSinkOperator.getParentOperators().get(0);
if (parentOperator instanceof TopNKeyOperator) {
return null;
}
List<ExprNodeDesc> partitionCols = Collections.emptyList();
if (reduceSinkDesc.isPTFReduceSink()) {
// Note: partition cols are prefix of key cols
if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) {
return null;
}
partitionCols = reduceSinkDesc.getPartitionCols();
}
TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols, efficiencyThreshold, checkEfficiencyNumBatches, maxNumberOfPartitions);
copyDown(reduceSinkOperator, topNKeyDesc);
reduceSinkDesc.setTopN(-1);
return null;
}
use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.
the class TopNKeyPushdownProcessor method pushdownThroughTopNKey.
/**
* Push through another Top N Key operator.
* If the TNK operators are the same one of them will be removed. See {@link TopNKeyDesc#isSame}
* else If expression in <code>topnKey</code> is a common prefix in it's parent TNK op and topN property is same
* then <code>topnkey</code> could be pushed through parent.
* If the Top N Key operator can not be pushed through this method tries to remove one of them:
* - if topN property is the same and the keys of one of the operators are subset of the other then the operator
* can be removed
* - if the keys are the same operator with higher topN value can be removed
* @param topNKey TopNKey operator to push
* @throws SemanticException when removeChildAndAdoptItsChildren was not successful
*/
private void pushdownThroughTopNKey(TopNKeyOperator topNKey) throws SemanticException {
TopNKeyOperator parent = (TopNKeyOperator) topNKey.getParentOperators().get(0);
if (hasSameTopNKeyDesc(parent, topNKey.getConf())) {
LOG.debug("Removing {} above same operator: {}", topNKey.getName(), parent.getName());
parent.removeChildAndAdoptItsChildren(topNKey);
return;
}
TopNKeyDesc topNKeyDesc = topNKey.getConf();
TopNKeyDesc parentTopNKeyDesc = parent.getConf();
CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), parentTopNKeyDesc.getKeyColumns(), parentTopNKeyDesc.getColumnSortOrder(), parentTopNKeyDesc.getNullOrder());
if (topNKeyDesc.getTopN() == parentTopNKeyDesc.getTopN()) {
if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
// TNK keys are subset of the parent TNK keys
pushdownThroughParent(topNKey);
if (topNKey.getChildOperators().get(0).getType() == OperatorType.TOPNKEY) {
LOG.debug("Removing {} since child {} supersedes it", parent.getName(), topNKey.getName());
topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey);
}
} else if (parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
// parent TNK keys are subset of TNK keys
LOG.debug("Removing parent of {} since it supersedes", topNKey.getName());
parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent);
}
} else if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size() && parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
if (topNKeyDesc.getTopN() > parentTopNKeyDesc.getTopN()) {
LOG.debug("Removing {}. Parent {} has same keys but lower topN {} > {}", topNKey.getName(), parent.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN());
topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey);
} else {
LOG.debug("Removing parent {}. {} has same keys but lower topN {} < {}", parent.getName(), topNKey.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN());
parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent);
}
}
}
use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.
the class TopNKeyPushdownProcessor method hasSameTopNKeyDesc.
private static boolean hasSameTopNKeyDesc(Operator<? extends OperatorDesc> operator, TopNKeyDesc desc) {
if (!(operator instanceof TopNKeyOperator)) {
return false;
}
final TopNKeyOperator topNKey = (TopNKeyOperator) operator;
final TopNKeyDesc opDesc = topNKey.getConf();
return opDesc.isSame(desc);
}
use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.
the class TopNKeyPushdownProcessor method pushdownThroughLeftOuterJoin.
/**
* Push through LOJ. If TopNKey expression refers fully to expressions from left input, push
* with rewriting of expressions and remove from top of LOJ. If TopNKey expression has a prefix
* that refers to expressions from left input, push with rewriting of those expressions and keep
* on top of LOJ.
*
* @param topNKey TopNKey operator to push
* @throws SemanticException when removeChildAndAdoptItsChildren was not successful
*/
private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws SemanticException {
final TopNKeyDesc topNKeyDesc = topNKey.getConf();
final CommonJoinOperator<? extends JoinDesc> join = (CommonJoinOperator<? extends JoinDesc>) topNKey.getParentOperators().get(0);
final List<Operator<? extends OperatorDesc>> joinInputs = join.getParentOperators();
final ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) joinInputs.get(0);
final ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf();
CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(mapUntilColumnEquals(topNKeyDesc.getKeyColumns(), join.getColumnExprMap()), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), reduceSinkDesc.getColumnExprMap(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder());
if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
return;
}
LOG.debug("Pushing a copy of {} through {} and {}", topNKey.getName(), join.getName(), reduceSinkOperator.getName());
final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
pushdown((TopNKeyOperator) copyDown(reduceSinkOperator, newTopNKeyDesc));
if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
LOG.debug("Removing {} above {}", topNKey.getName(), join.getName());
join.removeChildAndAdoptItsChildren(topNKey);
}
}
use of org.apache.hadoop.hive.ql.exec.TopNKeyOperator in project hive by apache.
the class TopNKeyPushdownProcessor method pushdownInnerJoin.
/**
* Tries to push the TopNKeyFilter through an inner join:
* requirements:
* - being PK-FK join
* - PK side is not filtered
* - First n TopNKey key columns (Order By) are originated from the FK side.
* @throws SemanticException
*/
private void pushdownInnerJoin(TopNKeyOperator topNKey, int fkJoinInputIndex, boolean nonFkSideIsFiltered) throws SemanticException {
TopNKeyDesc topNKeyDesc = topNKey.getConf();
CommonJoinOperator<? extends JoinDesc> join = (CommonJoinOperator<? extends JoinDesc>) topNKey.getParentOperators().get(0);
List<Operator<? extends OperatorDesc>> joinInputs = join.getParentOperators();
ReduceSinkOperator fkJoinInput = (ReduceSinkOperator) joinInputs.get(fkJoinInputIndex);
if (nonFkSideIsFiltered) {
LOG.debug("Not pushing {} through {} as non FK side of the join is filtered", topNKey.getName(), join.getName());
return;
}
CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(mapUntilColumnEquals(topNKeyDesc.getKeyColumns(), join.getColumnExprMap()), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), fkJoinInput.getConf().getKeyCols(), fkJoinInput.getConf().getColumnExprMap(), fkJoinInput.getConf().getOrder(), fkJoinInput.getConf().getNullOrder());
if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
return;
}
LOG.debug("Pushing a copy of {} through {} and {}", topNKey.getName(), join.getName(), fkJoinInput.getName());
final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
pushdown((TopNKeyOperator) copyDown(fkJoinInput, newTopNKeyDesc));
if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
LOG.debug("Removing {} above {}", topNKey.getName(), join.getName());
join.removeChildAndAdoptItsChildren(topNKey);
}
}
Aggregations