Search in sources :

Example 46 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class TestAccumuloRangeGenerator method testPartialRangeConjunction.

@Test
public void testPartialRangeConjunction() throws Exception {
    // rowId >= 'f'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // anythingElse <= 'foo'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "anythingElse", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "foo");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // And UDF
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    // Should generate [f,+inf)
    List<Range> expectedRanges = Arrays.asList(new Range(new Key("f"), true, null, false));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) Key(org.apache.accumulo.core.data.Key) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 47 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class TestAccumuloRangeGenerator method testCastExpression.

@Test
public void testCastExpression() throws Exception {
    // 40 and 50
    ExprNodeDesc fourty = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 40), fifty = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 50);
    // +
    GenericUDFOPPlus plus = new GenericUDFOPPlus();
    // 40 + 50
    ExprNodeGenericFuncDesc addition = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, plus, Arrays.asList(fourty, fifty));
    // cast(.... as string)
    UDFToString stringCast = new UDFToString();
    GenericUDFBridge stringCastBridge = new GenericUDFBridge("cast", false, stringCast.getClass().getName());
    // cast (40 + 50 as string)
    ExprNodeGenericFuncDesc cast = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, stringCastBridge, "cast", Collections.<ExprNodeDesc>singletonList(addition));
    ExprNodeDesc key = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "key", null, false);
    ExprNodeGenericFuncDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(key, cast));
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "key");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(node);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    // Don't fail -- would be better to actually compute a range of [90,+inf)
    Object result = nodeOutput.get(node);
    Assert.assertNull(result);
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) GenericUDFOPPlus(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) Test(org.junit.Test)

Example 48 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class TestAccumuloRangeGenerator method testRangeDisjunction.

@Test
public void testRangeDisjunction() throws Exception {
    // rowId >= 'f'
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "f");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrGreaterThan(), children);
    assertNotNull(node);
    // rowId <= 'm'
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "m");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children2);
    assertNotNull(node2);
    // Or UDF
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPOr(), bothFilters);
    // Should generate (-inf,+inf)
    List<Range> expectedRanges = Arrays.asList(new Range());
    AccumuloRangeGenerator rangeGenerator = new AccumuloRangeGenerator(handler, rowIdMapping, "rid");
    Dispatcher disp = new DefaultRuleDispatcher(rangeGenerator, Collections.<Rule, NodeProcessor>emptyMap(), null);
    GraphWalker ogw = new DefaultGraphWalker(disp);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.add(both);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    try {
        ogw.startWalking(topNodes, nodeOutput);
    } catch (SemanticException ex) {
        throw new RuntimeException(ex);
    }
    Object result = nodeOutput.get(both);
    Assert.assertNotNull(result);
    Assert.assertTrue("Result from graph walk was not a List", result instanceof List);
    @SuppressWarnings("unchecked") List<Range> actualRanges = (List<Range>) result;
    Assert.assertEquals(expectedRanges, actualRanges);
}
Also used : GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) HashMap(java.util.HashMap) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ArrayList(java.util.ArrayList) List(java.util.List) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Range(org.apache.accumulo.core.data.Range) GenericUDFOPEqualOrGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) Test(org.junit.Test)

Example 49 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class IndexWhereTaskDispatcher method dispatch.

@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
    Task<? extends Serializable> task = (Task<? extends Serializable>) nd;
    ParseContext pctx = physicalContext.getParseContext();
    // create the regex's so the walker can recognize our WHERE queries
    Map<Rule, NodeProcessor> operatorRules = createOperatorRules(pctx);
    // check for no indexes on any table
    if (operatorRules == null) {
        return null;
    }
    // create context so the walker can carry the current task with it.
    IndexWhereProcCtx indexWhereOptimizeCtx = new IndexWhereProcCtx(task, pctx);
    // create the dispatcher, which fires the processor according to the rule that
    // best matches
    Dispatcher dispatcher = new DefaultRuleDispatcher(getDefaultProcessor(), operatorRules, indexWhereOptimizeCtx);
    // walk the mapper operator(not task) tree for each specific task
    GraphWalker ogw = new DefaultGraphWalker(dispatcher);
    ArrayList<Node> topNodes = new ArrayList<Node>();
    if (task.getWork() instanceof MapredWork) {
        topNodes.addAll(((MapredWork) task.getWork()).getMapWork().getAliasToWork().values());
    } else {
        return null;
    }
    ogw.startWalking(topNodes, null);
    return null;
}
Also used : Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) Node(org.apache.hadoop.hive.ql.lib.Node) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Rule(org.apache.hadoop.hive.ql.lib.Rule) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Example 50 with DefaultRuleDispatcher

use of org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher in project hive by apache.

the class AnnotateWithStatistics method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    AnnotateStatsProcCtx aspCtx = new AnnotateStatsProcCtx(pctx);
    // create a walker which walks the tree in a BFS manner while maintaining the
    // operator stack. The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName() + "%"), StatsRulesProcFactory.getTableScanRule());
    opRules.put(new RuleRegExp("SEL", SelectOperator.getOperatorName() + "%"), StatsRulesProcFactory.getSelectRule());
    opRules.put(new RuleRegExp("FIL", FilterOperator.getOperatorName() + "%"), StatsRulesProcFactory.getFilterRule());
    opRules.put(new RuleRegExp("GBY", GroupByOperator.getOperatorName() + "%"), StatsRulesProcFactory.getGroupByRule());
    opRules.put(new RuleRegExp("JOIN", CommonJoinOperator.getOperatorName() + "%|" + MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule());
    opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"), StatsRulesProcFactory.getLimitRule());
    opRules.put(new RuleRegExp("RS", ReduceSinkOperator.getOperatorName() + "%"), StatsRulesProcFactory.getReduceSinkRule());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(StatsRulesProcFactory.getDefaultRule(), opRules, aspCtx);
    GraphWalker ogw = new LevelOrderWalker(disp, 0);
    // Create a list of topop nodes
    ArrayList<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pctx.getTopOps().values());
    ogw.startWalking(topNodes, null);
    return pctx;
}
Also used : NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) Rule(org.apache.hadoop.hive.ql.lib.Rule) LevelOrderWalker(org.apache.hadoop.hive.ql.lib.LevelOrderWalker) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Aggregations

DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)64 Node (org.apache.hadoop.hive.ql.lib.Node)64 GraphWalker (org.apache.hadoop.hive.ql.lib.GraphWalker)63 ArrayList (java.util.ArrayList)62 Dispatcher (org.apache.hadoop.hive.ql.lib.Dispatcher)62 NodeProcessor (org.apache.hadoop.hive.ql.lib.NodeProcessor)55 Rule (org.apache.hadoop.hive.ql.lib.Rule)55 LinkedHashMap (java.util.LinkedHashMap)54 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)49 RuleRegExp (org.apache.hadoop.hive.ql.lib.RuleRegExp)48 HashMap (java.util.HashMap)16 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)12 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)11 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)11 TypeRule (org.apache.hadoop.hive.ql.lib.TypeRule)10 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)7 GenericUDFOPEqualOrGreaterThan (org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan)7 Test (org.junit.Test)7 List (java.util.List)6