use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.
the class CountDistinctRewriteProc method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
// process group-by pattern
opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%"), getCountDistinctProc(pctx));
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, null);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.
the class SyntheticJoinPredicate method transform.
@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
boolean enabled = false;
String queryEngine = pctx.getConf().getVar(ConfVars.HIVE_EXECUTION_ENGINE);
if (queryEngine.equals("tez") && pctx.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
enabled = true;
} else if ((queryEngine.equals("spark") && pctx.getConf().isSparkDPPAny())) {
enabled = true;
}
if (!enabled) {
return pctx;
}
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", "(" + TableScanOperator.getOperatorName() + "%" + ".*" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinSynthetic());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
SyntheticContext context = new SyntheticContext(pctx);
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, context);
GraphWalker ogw = new PreOrderOnceWalker(disp);
// Create a list of top op nodes
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pctx.getTopOps().values());
ogw.startWalking(topNodes, null);
return pctx;
}
use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.
the class TableAccessAnalyzer method analyzeTableAccess.
public TableAccessInfo analyzeTableAccess() throws SemanticException {
// Set up the rules for the graph walker for group by and join operators
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%"), new GroupByProcessor(pGraphContext));
opRules.put(new RuleRegExp("R2", JoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
opRules.put(new RuleRegExp("R3", MapJoinOperator.getOperatorName() + "%"), new JoinProcessor(pGraphContext));
TableAccessCtx tableAccessCtx = new TableAccessCtx();
Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, tableAccessCtx);
GraphWalker ogw = new DefaultGraphWalker(disp);
// Create a list of topop nodes and walk!
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pGraphContext.getTopOps().values());
ogw.startWalking(topNodes, null);
return tableAccessCtx.getTableAccessInfo();
}
use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.
the class IndexWhereTaskDispatcher method createOperatorRules.
/**
* Create a set of rules that only matches WHERE predicates on columns we have
* an index on.
* @return
*/
private Map<Rule, NodeProcessor> createOperatorRules(ParseContext pctx) throws SemanticException {
Map<Rule, NodeProcessor> operatorRules = new LinkedHashMap<Rule, NodeProcessor>();
List<String> supportedIndexes = new ArrayList<String>();
supportedIndexes.add(CompactIndexHandler.class.getName());
supportedIndexes.add(BitmapIndexHandler.class.getName());
// query the metastore to know what columns we have indexed
Map<TableScanOperator, List<Index>> indexes = new HashMap<TableScanOperator, List<Index>>();
for (Operator<? extends OperatorDesc> op : pctx.getTopOps().values()) {
if (op instanceof TableScanOperator) {
List<Index> tblIndexes = IndexUtils.getIndexes(((TableScanOperator) op).getConf().getTableMetadata(), supportedIndexes);
if (tblIndexes.size() > 0) {
indexes.put((TableScanOperator) op, tblIndexes);
}
}
}
// quit if our tables don't have any indexes
if (indexes.size() == 0) {
return null;
}
// We set the pushed predicate from the WHERE clause as the filter expr on
// all table scan operators, so we look for table scan operators(TS%)
operatorRules.put(new RuleRegExp("RULEWhere", TableScanOperator.getOperatorName() + "%"), new IndexWhereProcessor(indexes));
return operatorRules;
}
use of org.apache.hadoop.hive.ql.lib.NodeProcessor in project hive by apache.
the class IndexWhereTaskDispatcher method dispatch.
@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
Task<? extends Serializable> task = (Task<? extends Serializable>) nd;
ParseContext pctx = physicalContext.getParseContext();
// create the regex's so the walker can recognize our WHERE queries
Map<Rule, NodeProcessor> operatorRules = createOperatorRules(pctx);
// check for no indexes on any table
if (operatorRules == null) {
return null;
}
// create context so the walker can carry the current task with it.
IndexWhereProcCtx indexWhereOptimizeCtx = new IndexWhereProcCtx(task, pctx);
// create the dispatcher, which fires the processor according to the rule that
// best matches
Dispatcher dispatcher = new DefaultRuleDispatcher(getDefaultProcessor(), operatorRules, indexWhereOptimizeCtx);
// walk the mapper operator(not task) tree for each specific task
GraphWalker ogw = new DefaultGraphWalker(dispatcher);
ArrayList<Node> topNodes = new ArrayList<Node>();
if (task.getWork() instanceof MapredWork) {
topNodes.addAll(((MapredWork) task.getWork()).getMapWork().getAliasToWork().values());
} else {
return null;
}
ogw.startWalking(topNodes, null);
return null;
}
Aggregations