use of org.apache.hadoop.hive.ql.lib.PreOrderWalker in project hive by apache.
the class SparkCompiler method runSetReducerParallelism.
private void runSetReducerParallelism(OptimizeSparkProcContext procCtx) throws SemanticException {
ParseContext pCtx = procCtx.getParseContext();
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetSparkReducerParallelism(pCtx.getConf()));
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
GraphWalker ogw = new PreOrderWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.PreOrderWalker in project hive by apache.
the class SparkSkewJoinResolver method resolve.
@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
SparkSkewJoinProcFactory.getVisitedJoinOp().clear();
Dispatcher disp = new SparkSkewJoinTaskDispatcher(pctx);
// since we may split current task, use a pre-order walker
GraphWalker ogw = new PreOrderWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getRootTasks());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.PreOrderWalker in project hive by apache.
the class MacroSemanticAnalyzer method analyzeCreateMacro.
@SuppressWarnings("unchecked")
private void analyzeCreateMacro(ASTNode ast) throws SemanticException {
String functionName = ast.getChild(0).getText();
// Temp macros are not allowed to have qualified names.
if (FunctionUtils.isQualifiedFunctionName(functionName)) {
throw new SemanticException("Temporary macro cannot be created with a qualified name.");
}
List<FieldSchema> arguments = BaseSemanticAnalyzer.getColumns((ASTNode) ast.getChild(1), true);
boolean isNoArgumentMacro = arguments.size() == 0;
RowResolver rowResolver = new RowResolver();
ArrayList<String> macroColNames = new ArrayList<String>(arguments.size());
ArrayList<TypeInfo> macroColTypes = new ArrayList<TypeInfo>(arguments.size());
final Set<String> actualColumnNames = new HashSet<String>();
if (!isNoArgumentMacro) {
/*
* Walk down expression to see which arguments are actually used.
*/
Node expression = (Node) ast.getChild(2);
PreOrderWalker walker = new PreOrderWalker(new Dispatcher() {
@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
if (nd instanceof ASTNode) {
ASTNode node = (ASTNode) nd;
if (node.getType() == HiveParser.TOK_TABLE_OR_COL) {
actualColumnNames.add(node.getChild(0).getText());
}
}
return null;
}
});
walker.startWalking(Collections.singletonList(expression), null);
}
for (FieldSchema argument : arguments) {
TypeInfo colType = TypeInfoUtils.getTypeInfoFromTypeString(argument.getType());
rowResolver.put("", argument.getName(), new ColumnInfo(argument.getName(), colType, "", false));
macroColNames.add(argument.getName());
macroColTypes.add(colType);
}
Set<String> expectedColumnNames = new LinkedHashSet<String>(macroColNames);
if (!expectedColumnNames.equals(actualColumnNames)) {
throw new SemanticException("Expected columns " + expectedColumnNames + " but found " + actualColumnNames);
}
if (expectedColumnNames.size() != macroColNames.size()) {
throw new SemanticException("At least one parameter name was used more than once " + macroColNames);
}
SemanticAnalyzer sa = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_ENABLED) ? new CalcitePlanner(queryState) : new SemanticAnalyzer(queryState);
;
ExprNodeDesc body;
if (isNoArgumentMacro) {
body = sa.genExprNodeDesc((ASTNode) ast.getChild(1), rowResolver);
} else {
body = sa.genExprNodeDesc((ASTNode) ast.getChild(2), rowResolver);
}
CreateMacroDesc desc = new CreateMacroDesc(functionName, macroColNames, macroColTypes, body);
rootTasks.add(TaskFactory.get(new FunctionWork(desc), conf));
addEntities();
}
use of org.apache.hadoop.hive.ql.lib.PreOrderWalker in project hive by apache.
the class BucketingSortingInferenceOptimizer method inferBucketingSorting.
/**
* For each map reduce task, if it has a reducer, infer whether or not the final output of the
* reducer is bucketed and/or sorted
*
* @param mapRedTasks
* @throws SemanticException
*/
private void inferBucketingSorting(List<ExecDriver> mapRedTasks) throws SemanticException {
for (ExecDriver mapRedTask : mapRedTasks) {
// of the outputs of intermediate map reduce jobs.
if (!mapRedTask.getWork().isFinalMapRed()) {
continue;
}
if (mapRedTask.getWork().getReduceWork() == null) {
continue;
}
Operator<? extends OperatorDesc> reducer = mapRedTask.getWork().getReduceWork().getReducer();
// uses sampling, which means it's not bucketed
boolean disableBucketing = mapRedTask.getWork().getMapWork().getSamplingType() > 0;
BucketingSortingCtx bCtx = new BucketingSortingCtx(disableBucketing);
// RuleRegExp rules are used to match operators anywhere in the tree
// RuleExactMatch rules are used to specify exactly what the tree should look like
// In particular, this guarantees that the first operator is the reducer
// (and its parent(s) are ReduceSinkOperators) since it begins walking the tree from
// the reducer.
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", SelectOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getSelProc());
// Matches only GroupByOperators which are reducers, rather than map group by operators,
// or multi group by optimization specific operators
opRules.put(new RuleExactMatch("R2", new String[] { GroupByOperator.getOperatorName() }), BucketingSortingOpProcFactory.getGroupByProc());
// Matches only JoinOperators which are reducers, rather than map joins, SMB map joins, etc.
opRules.put(new RuleExactMatch("R3", new String[] { JoinOperator.getOperatorName() }), BucketingSortingOpProcFactory.getJoinProc());
opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getFileSinkProc());
opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getFilterProc());
opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLimitProc());
opRules.put(new RuleRegExp("R9", LateralViewForwardOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLateralViewForwardProc());
opRules.put(new RuleRegExp("R10", LateralViewJoinOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getLateralViewJoinProc());
// Matches only ForwardOperators which are preceded by some other operator in the tree,
// in particular it can't be a reducer (and hence cannot be one of the ForwardOperators
// added by the multi group by optimization)
opRules.put(new RuleRegExp("R11", ".+" + ForwardOperator.getOperatorName() + "%"), BucketingSortingOpProcFactory.getForwardProc());
// Matches only ForwardOperators which are reducers and are followed by GroupByOperators
// (specific to the multi group by optimization)
opRules.put(new RuleExactMatch("R12", new String[] { ForwardOperator.getOperatorName(), GroupByOperator.getOperatorName() }), BucketingSortingOpProcFactory.getMultiGroupByProc());
// The dispatcher fires the processor corresponding to the closest matching rule and passes
// the context along
Dispatcher disp = new DefaultRuleDispatcher(BucketingSortingOpProcFactory.getDefaultProc(), opRules, bCtx);
GraphWalker ogw = new PreOrderWalker(disp);
// Create a list of topop nodes
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(reducer);
ogw.startWalking(topNodes, null);
mapRedTask.getWork().getMapWork().getBucketedColsByDirectory().putAll(bCtx.getBucketedColsByDirectory());
mapRedTask.getWork().getMapWork().getSortedColsByDirectory().putAll(bCtx.getSortedColsByDirectory());
}
}
Aggregations