use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.
the class TestAccumuloRangeGenerator method testDateRangeConjunction.
@Test
public void testDateRangeConjunction() throws Exception {
// rowId >= '2014-01-01'
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, Date.valueOf("2014-01-01"));
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
assertNotNull(node);
// rowId <= '2014-07-01'
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.dateTypeInfo, Date.valueOf("2014-07-01"));
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPLessThan(), children2);
assertNotNull(node2);
// And UDF
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
// Should generate [2014-01-01, 2014-07-01)
List<Range> expectedRanges = Arrays.asList(new Range(new Key("2014-01-01"), true, new Key("2014-07-01"), false));
AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid");
Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(both);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
try {
ogw.startWalking(topNodes, nodeOutput);
} catch (SemanticException ex) {
throw new RuntimeException(ex);
}
Object result = nodeOutput.get(both);
Assert.assertNotNull(result);
Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
@SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
Assert.assertEquals(expectedRanges, actualRanges);
}
use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.
the class TestAccumuloRangeGenerator method testRangeConjunction.
@Test
public void testRangeConjunction() throws Exception {
// rowId >= 'f'
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
assertNotNull(node);
// rowId <= 'm'
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m");
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
assertNotNull(node2);
// And UDF
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
// Should generate [f,m]
List<Range> expectedRanges = Arrays.asList(new Range(new Key("f"), true, new Key("m\0"), false));
AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(conf, handler, rowIdMapping, "rid");
Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList<Node> topNodes = new ArrayList<Node>();
topNodes.add(both);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
try {
ogw.startWalking(topNodes, nodeOutput);
} catch (SemanticException ex) {
throw new RuntimeException(ex);
}
Object result = nodeOutput.get(both);
Assert.assertNotNull(result);
Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
@SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
Assert.assertEquals(expectedRanges, actualRanges);
}
use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.
the class TezCompiler method removeSemijoinOptimizationFromSMBJoins.
private static void removeSemijoinOptimizationFromSMBJoins(OptimizeTezProcContext procCtx) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
return;
}
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + ".*" + TezDummyStoreOperator.getOperatorName() + "%" + CommonMergeJoinOperator.getOperatorName() + "%"), new SMBJoinOpProc());
SMBJoinOpProcContext ctx = new SMBJoinOpProcContext();
// The dispatcher finds SMB and if there is semijoin optimization before it, removes it.
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
List<TableScanOperator> tsOps = new ArrayList<>();
// Iterate over the map and remove semijoin optimizations if needed.
for (CommonMergeJoinOperator joinOp : ctx.JoinOpToTsOpMap.keySet()) {
// Get one top level TS Op directly from the stack
tsOps.add(ctx.JoinOpToTsOpMap.get(joinOp));
// Get the other one by examining Join Op
List<Operator<?>> parents = joinOp.getParentOperators();
for (Operator<?> parent : parents) {
if (parent instanceof TezDummyStoreOperator) {
// already accounted for
continue;
}
while (parent != null) {
if (parent instanceof TableScanOperator) {
tsOps.add((TableScanOperator) parent);
break;
}
parent = parent.getParentOperators().get(0);
}
}
}
// Now the relevant TableScanOperators are known, find if there exists
// a semijoin filter on any of them, if so, remove it.
ParseContext pctx = procCtx.parseContext;
Set<ReduceSinkOperator> rsSet = new HashSet<>(pctx.getRsToSemiJoinBranchInfo().keySet());
for (TableScanOperator ts : tsOps) {
for (ReduceSinkOperator rs : rsSet) {
SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(rs);
if (sjInfo != null && ts == sjInfo.getTsOp()) {
// match!
if (sjInfo.getIsHint()) {
throw new SemanticException("Removing hinted semijoin as it is with SMB join " + rs + " : " + ts);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Semijoin optimization found going to SMB join. Removing semijoin " + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
}
GenTezUtils.removeBranch(rs);
GenTezUtils.removeSemiJoinOperator(pctx, rs, ts);
}
}
}
}
use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.
the class TezCompiler method runDynamicPartitionPruning.
private void runDynamicPartitionPruning(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
return;
}
// Sequence of TableScan operators to be walked
Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
deque.addAll(procCtx.parseContext.getTopOps().values());
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp(new String("Dynamic Partition Pruning"), FilterOperator.getOperatorName() + "%"), new DynamicPartitionPruningOptimization());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new ForwardWalker(disp);
ogw.startWalking(topNodes, null);
}
use of org.apache.hadoop.hive.ql.lib.Dispatcher in project hive by apache.
the class TezCompiler method generateTaskTree.
@Override
protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
GenTezUtils utils = new GenTezUtils();
GenTezWork genTezWork = new GenTezWork(utils);
GenTezProcContext procCtx = new GenTezProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
// create a walker which walks the tree in a DFS manner while maintaining
// the operator stack.
// The dispatcher generates the plan from the operator tree
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genTezWork);
opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());
opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());
opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork));
opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genTezWork);
opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(utils));
opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());
opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(pCtx.getTopOps().values());
GraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
ogw.startWalking(topNodes, null);
// we need to specify the reserved memory for each work that contains Map Join
for (List<BaseWork> baseWorkList : procCtx.mapJoinWorkMap.values()) {
for (BaseWork w : baseWorkList) {
// work should be the smallest unit for memory allocation
w.setReservedMemoryMB((int) (conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
}
}
// we need to clone some operator plans and remove union operators still
int indexForTezUnion = 0;
for (BaseWork w : procCtx.workWithUnionOperators) {
GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
}
// then we make sure the file sink operators are set up right
for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
GenTezUtils.processFileSink(procCtx, fileSink);
}
// Connect any edges required for min/max pushdown
if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
// Process min/max
GenTezUtils.processDynamicSemiJoinPushDownOperator(procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
}
}
// and finally we hook up any events that need to be sent to the tez AM
LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
LOG.debug("Handling AppMasterEventOperator: " + event);
GenTezUtils.processAppMasterEvent(procCtx, event);
}
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
}
Aggregations