Search in sources :

Example 6 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class TezTask method getMapWork.

@Override
public Collection<MapWork> getMapWork() {
    List<MapWork> result = new LinkedList<MapWork>();
    TezWork work = getWork();
    // fine, broadcast parent isn't)
    for (BaseWork w : work.getAllWorkUnsorted()) {
        if (w instanceof MapWork) {
            List<BaseWork> parents = work.getParents(w);
            boolean candidate = true;
            for (BaseWork parent : parents) {
                if (!(parent instanceof UnionWork)) {
                    candidate = false;
                }
            }
            if (candidate) {
                result.add((MapWork) w);
            }
        }
    }
    return result;
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork) UnionWork(org.apache.hadoop.hive.ql.plan.UnionWork) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) LinkedList(java.util.LinkedList) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 7 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class ReduceRecordProcessor method close.

@Override
void close() {
    if (cache != null && cacheKeys != null) {
        for (String key : cacheKeys) {
            cache.release(key);
        }
    }
    if (dynamicValueCache != null && dynamicValueCacheKeys != null) {
        for (String k : dynamicValueCacheKeys) {
            dynamicValueCache.release(k);
        }
    }
    try {
        if (isAborted()) {
            for (ReduceRecordSource rs : sources) {
                if (!rs.close()) {
                    // Preserving the old logic. Hmm...
                    setAborted(false);
                    break;
                }
            }
        }
        boolean abort = isAborted();
        reducer.close(abort);
        if (mergeWorkList != null) {
            for (BaseWork redWork : mergeWorkList) {
                ((ReduceWork) redWork).getReducer().close(abort);
            }
        }
        // Need to close the dummyOps as well. The operator pipeline
        // is not considered "closed/done" unless all operators are
        // done. For broadcast joins that includes the dummy parents.
        List<HashTableDummyOperator> dummyOps = reduceWork.getDummyOps();
        if (dummyOps != null) {
            for (Operator<?> dummyOp : dummyOps) {
                dummyOp.close(abort);
            }
        }
        ReportStats rps = new ReportStats(reporter, jconf);
        reducer.preorderMap(rps);
    } catch (Exception e) {
        if (!isAborted()) {
            // signal new failure to map-reduce
            l4j.error("Hit error while closing operators - failing tree");
            throw new RuntimeException("Hive Runtime Error while closing operators: " + e.getMessage(), e);
        }
    } finally {
        Utilities.clearWorkMap(jconf);
        MapredContext.close();
    }
}
Also used : ReportStats(org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats) HashTableDummyOperator(org.apache.hadoop.hive.ql.exec.HashTableDummyOperator) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Example 8 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class TestGenTezWork method testCreateReduce.

@Test
public void testCreateReduce() throws SemanticException {
    // create map
    proc.process(rs, null, ctx, (Object[]) null);
    // create reduce
    proc.process(fs, null, ctx, (Object[]) null);
    TezWork work = ctx.currentTask.getWork();
    assertEquals(work.getAllWork().size(), 2);
    BaseWork w = work.getAllWork().get(1);
    assertTrue(w instanceof ReduceWork);
    assertTrue(work.getParents(w).contains(work.getAllWork().get(0)));
    ReduceWork rw = (ReduceWork) w;
    // need to make sure names are set for tez to connect things right
    assertNotNull(w.getName());
    // map work should start with our ts op
    assertSame(rw.getReducer(), fs);
    // should have severed the ties
    assertEquals(fs.getParentOperators().size(), 0);
}
Also used : ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) TezWork(org.apache.hadoop.hive.ql.plan.TezWork) Test(org.junit.Test)

Example 9 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class TezCompiler method generateTaskTree.

@Override
protected void generateTaskTree(List<Task<? extends Serializable>> rootTasks, ParseContext pCtx, List<Task<MoveWork>> mvTask, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
    ParseContext tempParseContext = getParseContext(pCtx, rootTasks);
    GenTezUtils utils = new GenTezUtils();
    GenTezWork genTezWork = new GenTezWork(utils);
    GenTezProcContext procCtx = new GenTezProcContext(conf, tempParseContext, mvTask, rootTasks, inputs, outputs);
    // create a walker which walks the tree in a DFS manner while maintaining
    // the operator stack.
    // The dispatcher generates the plan from the operator tree
    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
    opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genTezWork);
    opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc());
    opRules.put(new RuleRegExp("Recognize a Sorted Merge Join operator to setup the right edge and" + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + "%"), new MergeJoinProc());
    opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork));
    opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + "%"), genTezWork);
    opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(utils));
    opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), new UnionProcessor());
    opRules.put(new RuleRegExp("AppMasterEventOperator", AppMasterEventOperator.getOperatorName() + "%"), new AppMasterEventProcessor());
    // The dispatcher fires the processor corresponding to the closest matching
    // rule and passes the context along
    Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
    List<Node> topNodes = new ArrayList<Node>();
    topNodes.addAll(pCtx.getTopOps().values());
    GraphWalker ogw = new GenTezWorkWalker(disp, procCtx);
    ogw.startWalking(topNodes, null);
    // we need to specify the reserved memory for each work that contains Map Join
    for (List<BaseWork> baseWorkList : procCtx.mapJoinWorkMap.values()) {
        for (BaseWork w : baseWorkList) {
            // work should be the smallest unit for memory allocation
            w.setReservedMemoryMB((int) (conf.getLongVar(ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD) / (1024 * 1024)));
        }
    }
    // we need to clone some operator plans and remove union operators still
    int indexForTezUnion = 0;
    for (BaseWork w : procCtx.workWithUnionOperators) {
        GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
    }
    // then we make sure the file sink operators are set up right
    for (FileSinkOperator fileSink : procCtx.fileSinkSet) {
        GenTezUtils.processFileSink(procCtx, fileSink);
    }
    // Connect any edges required for min/max pushdown
    if (pCtx.getRsToRuntimeValuesInfoMap().size() > 0) {
        for (ReduceSinkOperator rs : pCtx.getRsToRuntimeValuesInfoMap().keySet()) {
            // Process min/max
            GenTezUtils.processDynamicSemiJoinPushDownOperator(procCtx, pCtx.getRsToRuntimeValuesInfoMap().get(rs), rs);
        }
    }
    // and finally we hook up any events that need to be sent to the tez AM
    LOG.debug("There are " + procCtx.eventOperatorSet.size() + " app master events.");
    for (AppMasterEventOperator event : procCtx.eventOperatorSet) {
        LOG.debug("Handling AppMasterEventOperator: " + event);
        GenTezUtils.processAppMasterEvent(procCtx, event);
    }
    perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "generateTaskTree");
}
Also used : Node(org.apache.hadoop.hive.ql.lib.Node) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) Dispatcher(org.apache.hadoop.hive.ql.lib.Dispatcher) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) LinkedHashMap(java.util.LinkedHashMap) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) GraphWalker(org.apache.hadoop.hive.ql.lib.GraphWalker) NodeProcessor(org.apache.hadoop.hive.ql.lib.NodeProcessor) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) RuleRegExp(org.apache.hadoop.hive.ql.lib.RuleRegExp) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) ReduceSinkMapJoinProc(org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc) CompositeProcessor(org.apache.hadoop.hive.ql.lib.CompositeProcessor) MergeJoinProc(org.apache.hadoop.hive.ql.optimizer.MergeJoinProc) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) Rule(org.apache.hadoop.hive.ql.lib.Rule)

Example 10 with BaseWork

use of org.apache.hadoop.hive.ql.plan.BaseWork in project hive by apache.

the class TezCompiler method setInputFormat.

@Override
protected void setInputFormat(Task<? extends Serializable> task) {
    if (task instanceof TezTask) {
        TezWork work = ((TezTask) task).getWork();
        List<BaseWork> all = work.getAllWork();
        for (BaseWork w : all) {
            if (w instanceof MapWork) {
                MapWork mapWork = (MapWork) w;
                HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
                if (!opMap.isEmpty()) {
                    for (Operator<? extends OperatorDesc> op : opMap.values()) {
                        setInputFormat(mapWork, op);
                    }
                }
            }
        }
    } else if (task instanceof ConditionalTask) {
        List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
        for (Task<? extends Serializable> tsk : listTasks) {
            setInputFormat(tsk);
        }
    }
    if (task.getChildTasks() != null) {
        for (Task<? extends Serializable> childTask : task.getChildTasks()) {
            setInputFormat(childTask);
        }
    }
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Aggregations

BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)54 ArrayList (java.util.ArrayList)16 Operator (org.apache.hadoop.hive.ql.exec.Operator)14 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)14 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)11 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)11 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)10 LinkedList (java.util.LinkedList)9 HashTableDummyOperator (org.apache.hadoop.hive.ql.exec.HashTableDummyOperator)9 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)9 TezWork (org.apache.hadoop.hive.ql.plan.TezWork)9 List (java.util.List)8 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)8 JobConf (org.apache.hadoop.mapred.JobConf)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 SparkEdgeProperty (org.apache.hadoop.hive.ql.plan.SparkEdgeProperty)7 SparkWork (org.apache.hadoop.hive.ql.plan.SparkWork)7 CommonMergeJoinOperator (org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)6