Examples with ConvertJoinMapJoin - org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin

Example 1 with ConvertJoinMapJoin

use of org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin in project hive by apache.

the class TestOperators method testNoConditionalTaskSizeForLlap.

@Test
public void testNoConditionalTaskSizeForLlap() {
    ConvertJoinMapJoin convertJoinMapJoin = new ConvertJoinMapJoin();
    long defaultNoConditionalTaskSize = 1024L * 1024L * 1024L;
    HiveConf hiveConf = new HiveConf();
    LlapClusterStateForCompile llapInfo = null;
    if ("llap".equalsIgnoreCase(hiveConf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
        llapInfo = LlapClusterStateForCompile.getClusterInfo(hiveConf);
        llapInfo.initClusterInfo();
    }
    // execution mode not set, null is returned
    assertEquals(defaultNoConditionalTaskSize, convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf, llapInfo).getAdjustedNoConditionalTaskSize());
    hiveConf.set(HiveConf.ConfVars.HIVE_EXECUTION_MODE.varname, "llap");
    if ("llap".equalsIgnoreCase(hiveConf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_MODE))) {
        llapInfo = LlapClusterStateForCompile.getClusterInfo(hiveConf);
        llapInfo.initClusterInfo();
    }
    // default executors is 4, max slots is 3. so 3 * 20% of noconditional task size will be oversubscribed
    hiveConf.set(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_OVERSUBSCRIBE_FACTOR.varname, "0.2");
    double fraction = hiveConf.getFloatVar(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_OVERSUBSCRIBE_FACTOR);
    int maxSlots = 3;
    long expectedSize = (long) (defaultNoConditionalTaskSize + (defaultNoConditionalTaskSize * fraction * maxSlots));
    assertEquals(expectedSize, convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf, llapInfo).getAdjustedNoConditionalTaskSize());
    // num executors is less than max executors per query (which is not expected case), default executors will be
    // chosen. 4 * 20% of noconditional task size will be oversubscribed
    int chosenSlots = hiveConf.getIntVar(HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS);
    hiveConf.set(HiveConf.ConfVars.LLAP_MEMORY_OVERSUBSCRIPTION_MAX_EXECUTORS_PER_QUERY.varname, "5");
    expectedSize = (long) (defaultNoConditionalTaskSize + (defaultNoConditionalTaskSize * fraction * chosenSlots));
    assertEquals(expectedSize, convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf, llapInfo).getAdjustedNoConditionalTaskSize());
    // disable memory checking
    hiveConf.set(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_MONITOR_CHECK_INTERVAL.varname, "0");
    assertFalse(convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf, llapInfo).doMemoryMonitoring());
    // invalid inflation factor
    hiveConf.set(HiveConf.ConfVars.LLAP_MAPJOIN_MEMORY_MONITOR_CHECK_INTERVAL.varname, "10000");
    hiveConf.set(HiveConf.ConfVars.HIVE_HASH_TABLE_INFLATION_FACTOR.varname, "0.0f");
    assertFalse(convertJoinMapJoin.getMemoryMonitorInfo(defaultNoConditionalTaskSize, hiveConf, llapInfo).doMemoryMonitoring());
}

Also used : ConvertJoinMapJoin(org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin) LlapClusterStateForCompile(org.apache.hadoop.hive.ql.optimizer.physical.LlapClusterStateForCompile) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Test(org.junit.Test)

Example 2 with ConvertJoinMapJoin

use of org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin in project hive by apache.

the class TezCompiler method runStatsDependentOptimizations.

private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    // Sequence of TableScan operators to be walked
    Deque<Operator<?>> deque = new LinkedList<Operator<?>>();
    deque.addAll(procCtx.parseContext.getTopOps().values());
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("Set parallelism - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), new SetReducerParallelism());
    opRules.put(new RuleRegExp("Convert Join to Map-join", JoinOperator.getOperatorName() + "%"), new ConvertJoinMapJoin());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(procCtx.parseContext.getTopOps().values());
    GraphWalker ogw = new ForwardWalker(disp);
    ogw.startWalking(topNodes, null);
}

Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) ForwardWalker(org.apache.hadoop.hive.ql.lib.ForwardWalker) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) Node(org.apache.hadoop.hive.ql.lib.Node) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) ArrayList(java.util.ArrayList) SetReducerParallelism(org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) ConvertJoinMapJoin(org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin) Rule(org.apache.hadoop.hive.ql.lib.Rule) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker)

Aggregations

ConvertJoinMapJoin (org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin)2 ArrayList (java.util.ArrayList)1 LinkedHashMap (java.util.LinkedHashMap)1 LinkedList (java.util.LinkedList)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)1 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)1 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)1 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)1 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)1 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)1 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)1 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)1 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)1 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)1 TezDummyStoreOperator (org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator)1 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)1 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)1