use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class TezCompiler method findParallelSemiJoinBranch.
private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS, ParseContext parseContext, Map<ReduceSinkOperator, TableScanOperator> semijoins) {
boolean parallelEdges = false;
for (Operator<?> op : mapjoin.getParentOperators()) {
if (!(op instanceof ReduceSinkOperator)) {
continue;
}
op = op.getParentOperators().get(0);
// Follow the Reducesink operator upstream which is on small table side.
while (!(op instanceof ReduceSinkOperator) && !(op instanceof TableScanOperator) && !(op.getChildren() != null && op.getChildren().size() > 1)) {
if (op instanceof MapJoinOperator) {
// ReduceSink, that is what we are looking for.
for (Operator<?> parentOp : op.getParentOperators()) {
if (parentOp instanceof ReduceSinkOperator) {
continue;
}
// parent in current pipeline
op = parentOp;
continue;
}
}
op = op.getParentOperators().get(0);
}
// Bail out if RS or TS is encountered.
if (op instanceof ReduceSinkOperator || op instanceof TableScanOperator) {
continue;
}
// A branch is hit.
for (Node nd : op.getChildren()) {
if (nd instanceof SelectOperator) {
Operator<?> child = (Operator<?>) nd;
while (child.getChildOperators().size() > 0) {
child = child.getChildOperators().get(0);
}
// If not ReduceSink Op, skip
if (!(child instanceof ReduceSinkOperator)) {
// This still could be DPP.
if (child instanceof AppMasterEventOperator && ((AppMasterEventOperator) child).getConf() instanceof DynamicPruningEventDesc) {
// DPP indeed, Set parallel edges true
parallelEdges = true;
}
continue;
}
ReduceSinkOperator rs = (ReduceSinkOperator) child;
SemiJoinBranchInfo sjInfo = parseContext.getRsToSemiJoinBranchInfo().get(rs);
if (sjInfo == null) {
continue;
}
TableScanOperator ts = sjInfo.getTsOp();
if (ts != bigTableTS) {
// skip, not the one we are looking for.
continue;
}
parallelEdges = true;
if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) {
// Created by hint, skip it
continue;
}
// Add the semijoin branch to the map
semijoins.put(rs, ts);
}
}
}
return parallelEdges;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class TezCompiler method removeSemijoinsParallelToMapJoin.
/*
* The algorithm looks at all the mapjoins in the operator pipeline until
* it hits RS Op and for each mapjoin examines if it has paralllel semijoin
* edge or dynamic partition pruning.
*/
private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || !procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) || procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) {
// are enabled for parallel mapjoins.
return;
}
// Get all the TS ops.
List<Operator<?>> topOps = new ArrayList<>();
topOps.addAll(procCtx.parseContext.getTopOps().values());
Map<ReduceSinkOperator, TableScanOperator> semijoins = new HashMap<>();
for (Operator<?> parent : topOps) {
// A TS can have multiple branches due to DPP Or Semijoin Opt.
// USe DFS to traverse all the branches until RS is hit.
Deque<Operator<?>> deque = new LinkedList<>();
deque.add(parent);
while (!deque.isEmpty()) {
Operator<?> op = deque.pollLast();
if (op instanceof ReduceSinkOperator) {
// Done with this branch
continue;
}
if (op instanceof MapJoinOperator) {
// A candidate.
if (!findParallelSemiJoinBranch(op, (TableScanOperator) parent, procCtx.parseContext, semijoins)) {
// no need to go down further, skip this TS operator pipeline.
break;
}
}
deque.addAll(op.getChildOperators());
}
}
if (semijoins.size() > 0) {
for (ReduceSinkOperator rs : semijoins.keySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Semijoin optimization with parallel edge to map join. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(semijoins.get(rs)));
}
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, rs, semijoins.get(rs));
}
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class TezCompiler method setInputFormat.
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
if (task instanceof TezTask) {
TezWork work = ((TezTask) task).getWork();
List<BaseWork> all = work.getAllWork();
for (BaseWork w : all) {
if (w instanceof MapWork) {
MapWork mapWork = (MapWork) w;
HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
if (!opMap.isEmpty()) {
for (Operator<? extends OperatorDesc> op : opMap.values()) {
setInputFormat(mapWork, op);
}
}
}
}
} else if (task instanceof ConditionalTask) {
List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
for (Task<? extends Serializable> tsk : listTasks) {
setInputFormat(tsk);
}
}
if (task.getChildTasks() != null) {
for (Task<? extends Serializable> childTask : task.getChildTasks()) {
setInputFormat(childTask);
}
}
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class TezCompiler method runRemoveDynamicPruningOptimization.
private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
// Sequence of TableScan operators to be walked
Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
deque.addAll(procCtx.parseContext.getTopOps().values());
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("Remove dynamic pruning by size", AppMasterEventOperator.getOperatorName() + "%"), new RemoveDynamicPruningBySize());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new ForwardWalker(disp);
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class HiveOpConverter method translateJoin.
private OpAttr translateJoin(RelNode joinRel) throws SemanticException {
// 0. Additional data structures needed for the join optimization
// through Hive
String[] baseSrc = new String[joinRel.getInputs().size()];
String tabAlias = getHiveDerivedTableAlias();
// 1. Convert inputs
OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
for (int i = 0; i < inputs.length; i++) {
inputs[i] = dispatch(joinRel.getInput(i));
children.add(inputs[i].inputs.get(0));
baseSrc[i] = inputs[i].tabAlias;
}
// 2. Generate tags
for (int tag = 0; tag < children.size(); tag++) {
ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
reduceSinkOp.getConf().setTag(tag);
}
// 3. Virtual columns
Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
if (joinRel instanceof HiveMultiJoin || !(joinRel instanceof SemiJoin)) {
int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
for (int i = 1; i < inputs.length; i++) {
newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
}
// 4. Extract join key expressions from HiveSortExchange
ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
for (int i = 0; i < inputs.length; i++) {
joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getJoinExpressions();
}
// 5. Extract rest of join predicate info. We infer the rest of join condition
// that will be added to the filters (join conditions that are not part of
// the join key)
List<RexNode> joinFilters;
if (joinRel instanceof HiveJoin) {
joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
} else if (joinRel instanceof HiveMultiJoin) {
joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
} else if (joinRel instanceof HiveSemiJoin) {
joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
} else {
throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
}
List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
for (int i = 0; i < joinFilters.size(); i++) {
List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
if (joinFilters.get(i) != null) {
for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
ExprNodeDesc expr = convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
filterExpressionsForInput.add(expr);
}
}
filterExpressions.add(filterExpressionsForInput);
}
// 6. Generate Join operator
JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
// 7. Return result
return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}
Aggregations