Search in sources :

Example 1 with ExecDriver

use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.

the class MapReduceCompiler method setInputFormat.

// loop over all the tasks recursively
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
    if (task instanceof ExecDriver) {
        MapWork work = ((MapredWork) task.getWork()).getMapWork();
        HashMap<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork();
        if (!opMap.isEmpty()) {
            for (Operator<? extends OperatorDesc> op : opMap.values()) {
                setInputFormat(work, op);
            }
        }
    } else if (task instanceof ConditionalTask) {
        List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
        for (Task<? extends Serializable> tsk : listTasks) {
            setInputFormat(tsk);
        }
    }
    if (task.getChildTasks() != null) {
        for (Task<? extends Serializable> childTask : task.getChildTasks()) {
            setInputFormat(childTask);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) GenMROperator(org.apache.hadoop.hive.ql.optimizer.GenMROperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) List(java.util.List) ArrayList(java.util.ArrayList) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 2 with ExecDriver

use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.

the class QueryPlan method populateQueryPlan.

/**
 * Populate api.QueryPlan from exec structures. This includes constructing the
 * dependency graphs of stages and operators.
 *
 * @throws IOException
 */
private void populateQueryPlan() throws IOException {
    query.setStageGraph(new org.apache.hadoop.hive.ql.plan.api.Graph());
    query.getStageGraph().setNodeType(NodeType.STAGE);
    Queue<Task<?>> tasksToVisit = new LinkedList<Task<?>>();
    Set<Task<?>> tasksVisited = new HashSet<Task<?>>();
    tasksToVisit.addAll(rootTasks);
    while (tasksToVisit.size() != 0) {
        Task<?> task = tasksToVisit.remove();
        tasksVisited.add(task);
        // populate stage
        org.apache.hadoop.hive.ql.plan.api.Stage stage = new org.apache.hadoop.hive.ql.plan.api.Stage();
        stage.setStageId(task.getId());
        stage.setStageType(task.getType());
        query.addToStageList(stage);
        if (task instanceof ExecDriver) {
            // populate map task
            ExecDriver mrTask = (ExecDriver) task;
            org.apache.hadoop.hive.ql.plan.api.Task mapTask = new org.apache.hadoop.hive.ql.plan.api.Task();
            mapTask.setTaskId(stage.getStageId() + "_MAP");
            mapTask.setTaskType(TaskType.MAP);
            stage.addToTaskList(mapTask);
            populateOperatorGraph(mapTask, mrTask.getWork().getMapWork().getAliasToWork().values());
            // populate reduce task
            if (mrTask.hasReduce()) {
                org.apache.hadoop.hive.ql.plan.api.Task reduceTask = new org.apache.hadoop.hive.ql.plan.api.Task();
                reduceTask.setTaskId(stage.getStageId() + "_REDUCE");
                reduceTask.setTaskType(TaskType.REDUCE);
                stage.addToTaskList(reduceTask);
                Collection<Operator<? extends OperatorDesc>> reducerTopOps = new ArrayList<Operator<? extends OperatorDesc>>();
                reducerTopOps.add(mrTask.getWork().getReduceWork().getReducer());
                populateOperatorGraph(reduceTask, reducerTopOps);
            }
        } else {
            org.apache.hadoop.hive.ql.plan.api.Task otherTask = new org.apache.hadoop.hive.ql.plan.api.Task();
            otherTask.setTaskId(stage.getStageId() + "_OTHER");
            otherTask.setTaskType(TaskType.OTHER);
            stage.addToTaskList(otherTask);
        }
        if (task instanceof ConditionalTask) {
            org.apache.hadoop.hive.ql.plan.api.Adjacency listEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
            listEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
            listEntry.setNode(task.getId());
            ConditionalTask t = (ConditionalTask) task;
            for (Task<?> listTask : t.getListTasks()) {
                if (t.getChildTasks() != null) {
                    org.apache.hadoop.hive.ql.plan.api.Adjacency childEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
                    childEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
                    childEntry.setNode(listTask.getId());
                    // done processing the task
                    for (Task<?> childTask : t.getChildTasks()) {
                        childEntry.addToChildren(childTask.getId());
                        if (!tasksVisited.contains(childTask)) {
                            tasksToVisit.add(childTask);
                        }
                    }
                    query.getStageGraph().addToAdjacencyList(childEntry);
                }
                listEntry.addToChildren(listTask.getId());
                if (!tasksVisited.contains(listTask)) {
                    tasksToVisit.add(listTask);
                }
            }
            query.getStageGraph().addToAdjacencyList(listEntry);
        } else if (task.getChildTasks() != null) {
            org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
            entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE);
            entry.setNode(task.getId());
            // done processing the task
            for (Task<?> childTask : task.getChildTasks()) {
                entry.addToChildren(childTask.getId());
                if (!tasksVisited.contains(childTask)) {
                    tasksToVisit.add(childTask);
                }
            }
            query.getStageGraph().addToAdjacencyList(entry);
        }
    }
}
Also used : Operator(org.apache.hadoop.hive.ql.exec.Operator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) ArrayList(java.util.ArrayList) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) HashSet(java.util.HashSet) LinkedList(java.util.LinkedList) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 3 with ExecDriver

use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.

the class MapReduceCompiler method setInputFormat.

// loop over all the tasks recursively
@Override
protected void setInputFormat(Task<?> task) {
    if (task instanceof ExecDriver) {
        MapWork work = ((MapredWork) task.getWork()).getMapWork();
        Map<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork();
        if (!opMap.isEmpty()) {
            for (Operator<? extends OperatorDesc> op : opMap.values()) {
                setInputFormat(work, op);
            }
        }
    } else if (task instanceof ConditionalTask) {
        List<Task<?>> listTasks = ((ConditionalTask) task).getListTasks();
        for (Task<?> tsk : listTasks) {
            setInputFormat(tsk);
        }
    }
    if (task.getChildTasks() != null) {
        for (Task<?> childTask : task.getChildTasks()) {
            setInputFormat(childTask);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) GenMROperator(org.apache.hadoop.hive.ql.optimizer.GenMROperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Task(org.apache.hadoop.hive.ql.exec.Task) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) List(java.util.List) ArrayList(java.util.ArrayList) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 4 with ExecDriver

use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.

the class TestUtilities method testGetTasksHaveNoRepeats.

/**
 * This test tests that Utilities.get*Tasks do not repeat themselves in the process
 * of extracting tasks from a given set of root tasks when given DAGs that can have
 * multiple paths, such as the case with Diamond-shaped DAGs common to replication.
 */
@Test
public void testGetTasksHaveNoRepeats() {
    CountingWrappingTask mrTask = new CountingWrappingTask(new ExecDriver());
    CountingWrappingTask tezTask = new CountingWrappingTask(new TezTask());
    CountingWrappingTask sparkTask = new CountingWrappingTask(new SparkTask());
    // First check - we should not have repeats in results
    assertEquals("No repeated MRTasks from Utilities.getMRTasks", 1, Utilities.getMRTasks(getTestDiamondTaskGraph(mrTask)).size());
    assertEquals("No repeated TezTasks from Utilities.getTezTasks", 1, Utilities.getTezTasks(getTestDiamondTaskGraph(tezTask)).size());
    assertEquals("No repeated TezTasks from Utilities.getSparkTasks", 1, Utilities.getSparkTasks(getTestDiamondTaskGraph(sparkTask)).size());
    // Second check - the tasks we looked for must not have been accessed more than
    // once as a result of the traversal (note that we actually wind up accessing
    // 2 times , because each visit counts twice, once to check for existence, and
    // once to visit.
    assertEquals("MRTasks should have been visited only once", 2, mrTask.getDepCallCount());
    assertEquals("TezTasks should have been visited only once", 2, tezTask.getDepCallCount());
    assertEquals("SparkTasks should have been visited only once", 2, sparkTask.getDepCallCount());
}
Also used : SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Test(org.junit.Test)

Example 5 with ExecDriver

use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.

the class TestHiveProtoLoggingHook method testQueueLogs.

@Test
public void testQueueLogs() throws Exception {
    context.setHookType(HookType.PRE_EXEC_HOOK);
    EventLogger evtLogger = new EventLogger(conf, SystemClock.getInstance());
    // This makes it MR task
    context.getQueryPlan().getRootTasks().add(new ExecDriver());
    evtLogger.handle(context);
    // This makes it Tez task
    MapWork mapWork = new MapWork();
    TezWork tezWork = new TezWork("test_queryid");
    tezWork.add(mapWork);
    TezTask task = new TezTask();
    task.setId("id1");
    task.setWork(tezWork);
    context.getQueryPlan().getRootTasks().add(task);
    context.getQueryPlan().getRootTasks().add(new TezTask());
    evtLogger.handle(context);
    // This makes it llap task
    mapWork.setLlapMode(true);
    evtLogger.handle(context);
    evtLogger.shutdown();
    ProtoMessageReader<HiveHookEventProto> reader = getTestReader(conf, tmpFolder);
    HiveHookEventProto event = reader.readEvent();
    Assert.assertNotNull(event);
    Assert.assertEquals(ExecutionMode.MR.name(), event.getExecutionMode());
    Assert.assertEquals(event.getQueue(), "mr_queue");
    event = reader.readEvent();
    Assert.assertNotNull(event);
    Assert.assertEquals(ExecutionMode.TEZ.name(), event.getExecutionMode());
    Assert.assertEquals(event.getQueue(), "tez_queue");
    event = reader.readEvent();
    Assert.assertNotNull(event);
    Assert.assertEquals(ExecutionMode.LLAP.name(), event.getExecutionMode());
    Assert.assertEquals(event.getQueue(), "llap_queue");
}
Also used : MapWork(org.apache.hadoop.hive.ql.plan.MapWork) EventLogger(org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook.EventLogger) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) HiveHookEventProto(org.apache.hadoop.hive.ql.hooks.proto.HiveHookEvents.HiveHookEventProto) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) TezWork(org.apache.hadoop.hive.ql.plan.TezWork) Test(org.junit.Test)

Aggregations

ExecDriver (org.apache.hadoop.hive.ql.exec.mr.ExecDriver)10 ArrayList (java.util.ArrayList)4 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)4 Task (org.apache.hadoop.hive.ql.exec.Task)4 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)4 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)3 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)3 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 LinkedList (java.util.LinkedList)2 List (java.util.List)2 ContentSummary (org.apache.hadoop.fs.ContentSummary)2 Path (org.apache.hadoop.fs.Path)2 PathFilter (org.apache.hadoop.fs.PathFilter)2 Context (org.apache.hadoop.hive.ql.Context)2 ExplainTask (org.apache.hadoop.hive.ql.exec.ExplainTask)2 FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)2 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)2 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)2