use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.
the class MapReduceCompiler method setInputFormat.
// loop over all the tasks recursively
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
if (task instanceof ExecDriver) {
MapWork work = ((MapredWork) task.getWork()).getMapWork();
HashMap<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork();
if (!opMap.isEmpty()) {
for (Operator<? extends OperatorDesc> op : opMap.values()) {
setInputFormat(work, op);
}
}
} else if (task instanceof ConditionalTask) {
List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
for (Task<? extends Serializable> tsk : listTasks) {
setInputFormat(tsk);
}
}
if (task.getChildTasks() != null) {
for (Task<? extends Serializable> childTask : task.getChildTasks()) {
setInputFormat(childTask);
}
}
}
use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.
the class QueryPlan method populateQueryPlan.
/**
* Populate api.QueryPlan from exec structures. This includes constructing the
* dependency graphs of stages and operators.
*
* @throws IOException
*/
private void populateQueryPlan() throws IOException {
query.setStageGraph(new org.apache.hadoop.hive.ql.plan.api.Graph());
query.getStageGraph().setNodeType(NodeType.STAGE);
Queue<Task<?>> tasksToVisit = new LinkedList<Task<?>>();
Set<Task<?>> tasksVisited = new HashSet<Task<?>>();
tasksToVisit.addAll(rootTasks);
while (tasksToVisit.size() != 0) {
Task<?> task = tasksToVisit.remove();
tasksVisited.add(task);
// populate stage
org.apache.hadoop.hive.ql.plan.api.Stage stage = new org.apache.hadoop.hive.ql.plan.api.Stage();
stage.setStageId(task.getId());
stage.setStageType(task.getType());
query.addToStageList(stage);
if (task instanceof ExecDriver) {
// populate map task
ExecDriver mrTask = (ExecDriver) task;
org.apache.hadoop.hive.ql.plan.api.Task mapTask = new org.apache.hadoop.hive.ql.plan.api.Task();
mapTask.setTaskId(stage.getStageId() + "_MAP");
mapTask.setTaskType(TaskType.MAP);
stage.addToTaskList(mapTask);
populateOperatorGraph(mapTask, mrTask.getWork().getMapWork().getAliasToWork().values());
// populate reduce task
if (mrTask.hasReduce()) {
org.apache.hadoop.hive.ql.plan.api.Task reduceTask = new org.apache.hadoop.hive.ql.plan.api.Task();
reduceTask.setTaskId(stage.getStageId() + "_REDUCE");
reduceTask.setTaskType(TaskType.REDUCE);
stage.addToTaskList(reduceTask);
Collection<Operator<? extends OperatorDesc>> reducerTopOps = new ArrayList<Operator<? extends OperatorDesc>>();
reducerTopOps.add(mrTask.getWork().getReduceWork().getReducer());
populateOperatorGraph(reduceTask, reducerTopOps);
}
} else {
org.apache.hadoop.hive.ql.plan.api.Task otherTask = new org.apache.hadoop.hive.ql.plan.api.Task();
otherTask.setTaskId(stage.getStageId() + "_OTHER");
otherTask.setTaskType(TaskType.OTHER);
stage.addToTaskList(otherTask);
}
if (task instanceof ConditionalTask) {
org.apache.hadoop.hive.ql.plan.api.Adjacency listEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
listEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
listEntry.setNode(task.getId());
ConditionalTask t = (ConditionalTask) task;
for (Task<?> listTask : t.getListTasks()) {
if (t.getChildTasks() != null) {
org.apache.hadoop.hive.ql.plan.api.Adjacency childEntry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
childEntry.setAdjacencyType(AdjacencyType.DISJUNCTIVE);
childEntry.setNode(listTask.getId());
// done processing the task
for (Task<?> childTask : t.getChildTasks()) {
childEntry.addToChildren(childTask.getId());
if (!tasksVisited.contains(childTask)) {
tasksToVisit.add(childTask);
}
}
query.getStageGraph().addToAdjacencyList(childEntry);
}
listEntry.addToChildren(listTask.getId());
if (!tasksVisited.contains(listTask)) {
tasksToVisit.add(listTask);
}
}
query.getStageGraph().addToAdjacencyList(listEntry);
} else if (task.getChildTasks() != null) {
org.apache.hadoop.hive.ql.plan.api.Adjacency entry = new org.apache.hadoop.hive.ql.plan.api.Adjacency();
entry.setAdjacencyType(AdjacencyType.CONJUNCTIVE);
entry.setNode(task.getId());
// done processing the task
for (Task<?> childTask : task.getChildTasks()) {
entry.addToChildren(childTask.getId());
if (!tasksVisited.contains(childTask)) {
tasksToVisit.add(childTask);
}
}
query.getStageGraph().addToAdjacencyList(entry);
}
}
}
use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.
the class MapReduceCompiler method setInputFormat.
// loop over all the tasks recursively
@Override
protected void setInputFormat(Task<?> task) {
if (task instanceof ExecDriver) {
MapWork work = ((MapredWork) task.getWork()).getMapWork();
Map<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork();
if (!opMap.isEmpty()) {
for (Operator<? extends OperatorDesc> op : opMap.values()) {
setInputFormat(work, op);
}
}
} else if (task instanceof ConditionalTask) {
List<Task<?>> listTasks = ((ConditionalTask) task).getListTasks();
for (Task<?> tsk : listTasks) {
setInputFormat(tsk);
}
}
if (task.getChildTasks() != null) {
for (Task<?> childTask : task.getChildTasks()) {
setInputFormat(childTask);
}
}
}
use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.
the class TestUtilities method testGetTasksHaveNoRepeats.
/**
* This test tests that Utilities.get*Tasks do not repeat themselves in the process
* of extracting tasks from a given set of root tasks when given DAGs that can have
* multiple paths, such as the case with Diamond-shaped DAGs common to replication.
*/
@Test
public void testGetTasksHaveNoRepeats() {
CountingWrappingTask mrTask = new CountingWrappingTask(new ExecDriver());
CountingWrappingTask tezTask = new CountingWrappingTask(new TezTask());
CountingWrappingTask sparkTask = new CountingWrappingTask(new SparkTask());
// First check - we should not have repeats in results
assertEquals("No repeated MRTasks from Utilities.getMRTasks", 1, Utilities.getMRTasks(getTestDiamondTaskGraph(mrTask)).size());
assertEquals("No repeated TezTasks from Utilities.getTezTasks", 1, Utilities.getTezTasks(getTestDiamondTaskGraph(tezTask)).size());
assertEquals("No repeated TezTasks from Utilities.getSparkTasks", 1, Utilities.getSparkTasks(getTestDiamondTaskGraph(sparkTask)).size());
// Second check - the tasks we looked for must not have been accessed more than
// once as a result of the traversal (note that we actually wind up accessing
// 2 times , because each visit counts twice, once to check for existence, and
// once to visit.
assertEquals("MRTasks should have been visited only once", 2, mrTask.getDepCallCount());
assertEquals("TezTasks should have been visited only once", 2, tezTask.getDepCallCount());
assertEquals("SparkTasks should have been visited only once", 2, sparkTask.getDepCallCount());
}
use of org.apache.hadoop.hive.ql.exec.mr.ExecDriver in project hive by apache.
the class TestHiveProtoLoggingHook method testQueueLogs.
@Test
public void testQueueLogs() throws Exception {
context.setHookType(HookType.PRE_EXEC_HOOK);
EventLogger evtLogger = new EventLogger(conf, SystemClock.getInstance());
// This makes it MR task
context.getQueryPlan().getRootTasks().add(new ExecDriver());
evtLogger.handle(context);
// This makes it Tez task
MapWork mapWork = new MapWork();
TezWork tezWork = new TezWork("test_queryid");
tezWork.add(mapWork);
TezTask task = new TezTask();
task.setId("id1");
task.setWork(tezWork);
context.getQueryPlan().getRootTasks().add(task);
context.getQueryPlan().getRootTasks().add(new TezTask());
evtLogger.handle(context);
// This makes it llap task
mapWork.setLlapMode(true);
evtLogger.handle(context);
evtLogger.shutdown();
ProtoMessageReader<HiveHookEventProto> reader = getTestReader(conf, tmpFolder);
HiveHookEventProto event = reader.readEvent();
Assert.assertNotNull(event);
Assert.assertEquals(ExecutionMode.MR.name(), event.getExecutionMode());
Assert.assertEquals(event.getQueue(), "mr_queue");
event = reader.readEvent();
Assert.assertNotNull(event);
Assert.assertEquals(ExecutionMode.TEZ.name(), event.getExecutionMode());
Assert.assertEquals(event.getQueue(), "tez_queue");
event = reader.readEvent();
Assert.assertNotNull(event);
Assert.assertEquals(ExecutionMode.LLAP.name(), event.getExecutionMode());
Assert.assertEquals(event.getQueue(), "llap_queue");
}
Aggregations