use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class SetSparkReducerParallelism method needSetParallelism.
// tests whether the RS needs automatic setting parallelism
private boolean needSetParallelism(ReduceSinkOperator reduceSink, HiveConf hiveConf) {
ReduceSinkDesc desc = reduceSink.getConf();
if (desc.getNumReducers() <= 0) {
return true;
}
if (desc.getNumReducers() == 1 && desc.hasOrderBy() && hiveConf.getBoolVar(HiveConf.ConfVars.HIVESAMPLINGFORORDERBY) && !desc.isDeduplicated()) {
Stack<Operator<? extends OperatorDesc>> descendants = new Stack<Operator<? extends OperatorDesc>>();
List<Operator<? extends OperatorDesc>> children = reduceSink.getChildOperators();
if (children != null) {
for (Operator<? extends OperatorDesc> child : children) {
descendants.push(child);
}
}
while (descendants.size() != 0) {
Operator<? extends OperatorDesc> descendant = descendants.pop();
// If the decendants contains LimitOperator,return false
if (descendant instanceof LimitOperator) {
return false;
}
boolean reachTerminalOperator = (descendant instanceof TerminalOperator);
if (!reachTerminalOperator) {
List<Operator<? extends OperatorDesc>> childrenOfDescendant = descendant.getChildOperators();
if (childrenOfDescendant != null) {
for (Operator<? extends OperatorDesc> childOfDescendant : childrenOfDescendant) {
descendants.push(childOfDescendant);
}
}
}
}
return true;
}
return false;
}
use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class TopNKeyProcessor method process.
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
// Get ReduceSinkOperator
ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd;
ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf();
// Check whether the reduce sink operator contains top n
if (reduceSinkDesc.getTopN() < 0 || !reduceSinkDesc.isOrdering()) {
return null;
}
if (reduceSinkDesc.getTopN() > maxTopNAllowed) {
return null;
}
// Check whether there already is a top n key operator
Operator<? extends OperatorDesc> parentOperator = reduceSinkOperator.getParentOperators().get(0);
if (parentOperator instanceof TopNKeyOperator) {
return null;
}
List<ExprNodeDesc> partitionCols = Collections.emptyList();
if (reduceSinkDesc.isPTFReduceSink()) {
// Note: partition cols are prefix of key cols
if (reduceSinkDesc.getPartitionCols().size() >= reduceSinkDesc.getKeyCols().size()) {
return null;
}
partitionCols = reduceSinkDesc.getPartitionCols();
}
TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols(), partitionCols, efficiencyThreshold, checkEfficiencyNumBatches, maxNumberOfPartitions);
copyDown(reduceSinkOperator, topNKeyDesc);
reduceSinkDesc.setTopN(-1);
return null;
}
use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class TestGenTezWork method setUp.
/**
* @throws java.lang.Exception
*/
@SuppressWarnings("unchecked")
@Before
public void setUp() throws Exception {
// Init conf
final HiveConf conf = new HiveConf(SemanticAnalyzer.class);
SessionState.start(conf);
// Init parse context
final ParseContext pctx = new ParseContext();
pctx.setContext(new Context(conf));
ctx = new GenTezProcContext(conf, pctx, Collections.EMPTY_LIST, new ArrayList<Task<?>>(), Collections.EMPTY_SET, Collections.EMPTY_SET);
proc = new GenTezWork(new GenTezUtils() {
@Override
protected void setupMapWork(MapWork mapWork, GenTezProcContext context, PrunedPartitionList partitions, TableScanOperator root, String alias) throws SemanticException {
LinkedHashMap<String, Operator<? extends OperatorDesc>> map = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
map.put("foo", root);
mapWork.setAliasToWork(map);
return;
}
});
CompilationOpContext cCtx = new CompilationOpContext();
fs = new FileSinkOperator(cCtx);
fs.setConf(new FileSinkDesc());
rs = new ReduceSinkOperator(cCtx);
rs.setConf(new ReduceSinkDesc());
TableDesc tableDesc = new TableDesc();
tableDesc.setProperties(new Properties());
rs.getConf().setKeySerializeInfo(tableDesc);
ts = new TableScanOperator(cCtx);
ts.setConf(new TableScanDesc(null));
ts.getChildOperators().add(rs);
rs.getParentOperators().add(ts);
rs.getChildOperators().add(fs);
fs.getParentOperators().add(rs);
ctx.preceedingWork = null;
ctx.currentRootOperator = ts;
}
use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class SharedWorkOptimizer method compareOperator.
private static boolean compareOperator(ParseContext pctx, Operator<?> op1, Operator<?> op2) throws SemanticException {
if (!op1.getClass().getName().equals(op2.getClass().getName())) {
return false;
}
// TODO: move this to logicalEquals
if (op1 instanceof ReduceSinkOperator) {
ReduceSinkDesc op1Conf = ((ReduceSinkOperator) op1).getConf();
ReduceSinkDesc op2Conf = ((ReduceSinkOperator) op2).getConf();
if (StringUtils.equals(op1Conf.getKeyColString(), op2Conf.getKeyColString()) && StringUtils.equals(op1Conf.getValueColsString(), op2Conf.getValueColsString()) && StringUtils.equals(op1Conf.getParitionColsString(), op2Conf.getParitionColsString()) && op1Conf.getTag() == op2Conf.getTag() && StringUtils.equals(op1Conf.getOrder(), op2Conf.getOrder()) && StringUtils.equals(op1Conf.getNullOrder(), op2Conf.getNullOrder()) && op1Conf.getTopN() == op2Conf.getTopN() && canDeduplicateReduceTraits(op1Conf, op2Conf)) {
return true;
} else {
return false;
}
}
// TODO: move this to logicalEquals
if (op1 instanceof TableScanOperator) {
TableScanOperator tsOp1 = (TableScanOperator) op1;
TableScanOperator tsOp2 = (TableScanOperator) op2;
TableScanDesc op1Conf = tsOp1.getConf();
TableScanDesc op2Conf = tsOp2.getConf();
Table tableMeta1 = op1Conf.getTableMetadata();
Table tableMeta2 = op2Conf.getTableMetadata();
if (StringUtils.equals(tableMeta1.getFullyQualifiedName(), tableMeta2.getFullyQualifiedName()) && op1Conf.getNeededColumns().equals(op2Conf.getNeededColumns()) && StringUtils.equals(op1Conf.getFilterExprString(), op2Conf.getFilterExprString()) && pctx.getPrunedPartitions(tsOp1).getPartitions().equals(pctx.getPrunedPartitions(tsOp2).getPartitions()) && op1Conf.getRowLimit() == op2Conf.getRowLimit() && Objects.equals(op1Conf.getIncludedBuckets(), op2Conf.getIncludedBuckets()) && Objects.equals(op1Conf.getOpProps(), op2Conf.getOpProps())) {
return true;
} else {
return false;
}
}
return op1.logicalEquals(op2);
}
use of org.apache.hadoop.hive.ql.plan.ReduceSinkDesc in project hive by apache.
the class ParallelEdgeFixer method fixParallelEdge.
/**
* Fixes a parallel edge going into a mapjoin by introducing a concentrator RS.
*/
private void fixParallelEdge(Operator<? extends OperatorDesc> p, Operator<?> o) throws SemanticException {
LOG.info("Fixing parallel by adding a concentrator RS between {} -> {}", p, o);
ReduceSinkDesc conf = (ReduceSinkDesc) p.getConf();
ReduceSinkDesc newConf = (ReduceSinkDesc) conf.clone();
Operator<SelectDesc> newSEL = buildSEL(p, conf);
Operator<ReduceSinkDesc> newRS = OperatorFactory.getAndMakeChild(p.getCompilationOpContext(), newConf, new ArrayList<>());
conf.setOutputName("forward_to_" + newRS);
conf.setTag(0);
newConf.setKeyCols(new ArrayList<>(conf.getKeyCols()));
newRS.setSchema(new RowSchema(p.getSchema()));
p.replaceChild(o, newSEL);
newSEL.setParentOperators(Lists.<Operator<?>>newArrayList(p));
newSEL.setChildOperators(Lists.<Operator<?>>newArrayList(newRS));
newRS.setParentOperators(Lists.<Operator<?>>newArrayList(newSEL));
newRS.setChildOperators(Lists.<Operator<?>>newArrayList(o));
o.replaceParent(p, newRS);
}
Aggregations