use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.
the class TopNKeyProcessor method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
// Get ReduceSinkOperator
ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd;
ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf();
// Check whether the reduce sink operator contains top n
if (reduceSinkDesc.getTopN() < 0 || !reduceSinkDesc.isOrdering()) {
return null;
}
if (reduceSinkDesc.getTopN() > maxTopNAllowed) {
return null;
}
// Check whether there already is a top n key operator
Operator<? extends OperatorDesc> parentOperator = reduceSinkOperator.getParentOperators().get(0);
if (parentOperator instanceof TopNKeyOperator) {
return null;
}
List<ExprNodeDesc> partitionCols = Collections.emptyList();
if (reduceSinkDesc.isPTFReduceSink()) {
// Note: partition cols are prefix of key cols
if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) {
return null;
}
partitionCols = reduceSinkDesc.getPartitionCols();
}
TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols, efficiencyThreshold, checkEfficiencyNumBatches, maxNumberOfPartitions);
copyDown(reduceSinkOperator, topNKeyDesc);
reduceSinkDesc.setTopN(-1);
return null;
}
use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.
the class TopNKeyPushdownProcessor method pushdownThroughGroupBy.
/**
* Push through GroupBy. No grouping sets. If TopNKey expression is same as GroupBy expression,
* we can push it and remove it from above GroupBy. If expression in TopNKey shared common
* prefix with GroupBy, TopNKey could be pushed through GroupBy using that prefix and kept above
* it.
*
* @param topNKey TopNKey operator to push
* @throws SemanticException when removeChildAndAdoptItsChildren was not successful
*/
private void pushdownThroughGroupBy(TopNKeyOperator topNKey) throws SemanticException {
final GroupByOperator groupBy = (GroupByOperator) topNKey.getParentOperators().get(0);
final GroupByDesc groupByDesc = groupBy.getConf();
final TopNKeyDesc topNKeyDesc = topNKey.getConf();
CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, groupByDesc);
if (commonKeyPrefix.isEmpty() || commonKeyPrefix.size() == topNKeyDesc.getPartitionKeyColumns().size()) {
return;
}
LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), groupBy.getName());
final TopNKeyDesc newTopNKeyDesc = topNKeyDesc.combine(commonKeyPrefix);
pushdown((TopNKeyOperator) copyDown(groupBy, newTopNKeyDesc));
if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
LOG.debug("Removing {} above {}", topNKey.getName(), groupBy.getName());
groupBy.removeChildAndAdoptItsChildren(topNKey);
}
}
use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.
the class Vectorizer method vectorizeTopNKeyOperator.
private static Operator<? extends OperatorDesc> vectorizeTopNKeyOperator(Operator<? extends OperatorDesc> topNKeyOperator, VectorizationContext vContext, VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException {
TopNKeyDesc topNKeyDesc = (TopNKeyDesc) topNKeyOperator.getConf();
VectorExpression[] keyExpressions = getVectorExpressions(vContext, topNKeyDesc.getKeyColumns());
VectorExpression[] partitionKeyExpressions = getVectorExpressions(vContext, topNKeyDesc.getPartitionKeyColumns());
vectorTopNKeyDesc.setKeyExpressions(keyExpressions);
vectorTopNKeyDesc.setPartitionKeyColumns(partitionKeyExpressions);
return OperatorFactory.getVectorOperator(topNKeyOperator.getCompilationOpContext(), topNKeyDesc, vContext, vectorTopNKeyDesc);
}
use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.
the class TopNKeyPushdownProcessor method pushdownThroughTopNKey.
/**
* Push through another Top N Key operator.
* If the TNK operators are the same one of them will be removed. See {@link TopNKeyDesc#isSame}
* else If expression in <code>topnKey</code> is a common prefix in it's parent TNK op and topN property is same
* then <code>topnkey</code> could be pushed through parent.
* If the Top N Key operator can not be pushed through this method tries to remove one of them:
* - if topN property is the same and the keys of one of the operators are subset of the other then the operator
* can be removed
* - if the keys are the same operator with higher topN value can be removed
* @param topNKey TopNKey operator to push
* @throws SemanticException when removeChildAndAdoptItsChildren was not successful
*/
private void pushdownThroughTopNKey(TopNKeyOperator topNKey) throws SemanticException {
TopNKeyOperator parent = (TopNKeyOperator) topNKey.getParentOperators().get(0);
if (hasSameTopNKeyDesc(parent, topNKey.getConf())) {
LOG.debug("Removing {} above same operator: {}", topNKey.getName(), parent.getName());
parent.removeChildAndAdoptItsChildren(topNKey);
return;
}
TopNKeyDesc topNKeyDesc = topNKey.getConf();
TopNKeyDesc parentTopNKeyDesc = parent.getConf();
CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), parentTopNKeyDesc.getKeyColumns(), parentTopNKeyDesc.getColumnSortOrder(), parentTopNKeyDesc.getNullOrder());
if (topNKeyDesc.getTopN() == parentTopNKeyDesc.getTopN()) {
if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
// TNK keys are subset of the parent TNK keys
pushdownThroughParent(topNKey);
if (topNKey.getChildOperators().get(0).getType() == OperatorType.TOPNKEY) {
LOG.debug("Removing {} since child {} supersedes it", parent.getName(), topNKey.getName());
topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey);
}
} else if (parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
// parent TNK keys are subset of TNK keys
LOG.debug("Removing parent of {} since it supersedes", topNKey.getName());
parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent);
}
} else if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size() && parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) {
if (topNKeyDesc.getTopN() > parentTopNKeyDesc.getTopN()) {
LOG.debug("Removing {}. Parent {} has same keys but lower topN {} > {}", topNKey.getName(), parent.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN());
topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey);
} else {
LOG.debug("Removing parent {}. {} has same keys but lower topN {} < {}", parent.getName(), topNKey.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN());
parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent);
}
}
}
use of org.apache.hadoop.hive.ql.plan.TopNKeyDesc in project hive by apache.
the class TopNKeyPushdownProcessor method hasSameTopNKeyDesc.
private static boolean hasSameTopNKeyDesc(Operator<? extends OperatorDesc> operator, TopNKeyDesc desc) {
if (!(operator instanceof TopNKeyOperator)) {
return false;
}
final TopNKeyOperator topNKey = (TopNKeyOperator) operator;
final TopNKeyDesc opDesc = topNKey.getConf();
return opDesc.isSame(desc);
}
Aggregations