use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class SparkCrossProductCheck method dispatch.
@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
@SuppressWarnings("unchecked") Task<?> currTask = (Task<?>) nd;
if (currTask instanceof SparkTask) {
SparkWork sparkWork = ((SparkTask) currTask).getWork();
checkShuffleJoin(sparkWork);
checkMapJoin((SparkTask) currTask);
} else if (currTask instanceof ConditionalTask) {
List<Task<?>> taskList = ((ConditionalTask) currTask).getListTasks();
for (Task<?> task : taskList) {
dispatch(task, stack, nodeOutputs);
}
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class AbstractJoinTaskDispatcher method dispatch.
@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
if (nodeOutputs == null || nodeOutputs.length == 0) {
throw new SemanticException("No Dispatch Context");
}
TaskGraphWalkerContext walkerCtx = (TaskGraphWalkerContext) nodeOutputs[0];
Task<?> currTask = (Task<?>) nd;
// not map reduce task or not conditional task, just skip
if (currTask.isMapRedTask()) {
if (currTask instanceof ConditionalTask) {
// get the list of task
List<Task<?>> taskList = ((ConditionalTask) currTask).getListTasks();
for (Task<?> tsk : taskList) {
if (tsk.isMapRedTask()) {
Task<?> newTask = this.processCurrentTask((MapRedTask) tsk, ((ConditionalTask) currTask), physicalContext.getContext());
walkerCtx.addToDispatchList(newTask);
}
}
} else {
Task<?> newTask = this.processCurrentTask((MapRedTask) currTask, null, physicalContext.getContext());
walkerCtx.addToDispatchList(newTask);
}
}
return null;
}
use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class GenMapRedUtils method createCondTask.
/**
* Construct a conditional task given the current leaf task, the MoveWork and the MapredWork.
*
* @param conf
* HiveConf
* @param currTask
* current leaf task
* @param dummyMoveWork
* MoveWork for the move task
* @param mergeWork
* MapredWork for the merge task.
* @param condInputPath
* the input directory of the merge/move task
* @param condOutputPath
* the output directory of the merge/move task
* @param moveTaskToLink
* a MoveTask that may be linked to the conditional sub-tasks
* @param dependencyTask
* a dependency task that may be linked to the conditional sub-tasks
* @return The conditional task
*/
private static ConditionalTask createCondTask(HiveConf conf, Task<? extends Serializable> currTask, MoveWork dummyMoveWork, Serializable mergeWork, Path condInputPath, Path condOutputPath, Task<MoveWork> moveTaskToLink, DependencyCollectionTask dependencyTask) {
boolean shouldMergeMovePaths = (moveTaskToLink != null && dependencyTask == null && shouldMergeMovePaths(conf, condInputPath, condOutputPath, moveTaskToLink.getWork()));
MoveWork workForMoveOnlyTask;
if (shouldMergeMovePaths) {
workForMoveOnlyTask = mergeMovePaths(condInputPath, moveTaskToLink.getWork());
} else {
workForMoveOnlyTask = dummyMoveWork;
}
// There are 3 options for this ConditionalTask:
// 1) Merge the partitions
// 2) Move the partitions (i.e. don't merge the partitions)
// 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't
// merge others) in this case the merge is done first followed by the move to prevent
// conflicts.
Task<? extends Serializable> mergeOnlyMergeTask = TaskFactory.get(mergeWork, conf);
Task<? extends Serializable> moveOnlyMoveTask = TaskFactory.get(workForMoveOnlyTask, conf);
Task<? extends Serializable> mergeAndMoveMergeTask = TaskFactory.get(mergeWork, conf);
Task<? extends Serializable> mergeAndMoveMoveTask = TaskFactory.get(dummyMoveWork, conf);
// NOTE! It is necessary merge task is the parent of the move task, and not
// the other way around, for the proper execution of the execute method of
// ConditionalTask
mergeAndMoveMergeTask.addDependentTask(mergeAndMoveMoveTask);
List<Serializable> listWorks = new ArrayList<Serializable>();
listWorks.add(workForMoveOnlyTask);
listWorks.add(mergeWork);
ConditionalWork cndWork = new ConditionalWork(listWorks);
List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
listTasks.add(moveOnlyMoveTask);
listTasks.add(mergeOnlyMergeTask);
listTasks.add(mergeAndMoveMergeTask);
ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf);
cndTsk.setListTasks(listTasks);
// create resolver
cndTsk.setResolver(new ConditionalResolverMergeFiles());
ConditionalResolverMergeFilesCtx mrCtx = new ConditionalResolverMergeFilesCtx(listTasks, condInputPath.toString());
cndTsk.setResolverCtx(mrCtx);
// make the conditional task as the child of the current leaf task
currTask.addDependentTask(cndTsk);
if (shouldMergeMovePaths) {
// If a new MoveWork was created, then we should link all dependent tasks from the MoveWork to link.
if (moveTaskToLink.getDependentTasks() != null) {
for (Task dependentTask : moveTaskToLink.getDependentTasks()) {
moveOnlyMoveTask.addDependentTask(dependentTask);
}
}
} else {
addDependentMoveTasks(moveTaskToLink, conf, moveOnlyMoveTask, dependencyTask);
}
addDependentMoveTasks(moveTaskToLink, conf, mergeOnlyMergeTask, dependencyTask);
addDependentMoveTasks(moveTaskToLink, conf, mergeAndMoveMoveTask, dependencyTask);
return cndTsk;
}
use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class SparkCompiler method setInputFormat.
@Override
protected void setInputFormat(Task<? extends Serializable> task) {
if (task instanceof SparkTask) {
SparkWork work = ((SparkTask) task).getWork();
List<BaseWork> all = work.getAllWork();
for (BaseWork w : all) {
if (w instanceof MapWork) {
MapWork mapWork = (MapWork) w;
HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
if (!opMap.isEmpty()) {
for (Operator<? extends OperatorDesc> op : opMap.values()) {
setInputFormat(mapWork, op);
}
}
}
}
} else if (task instanceof ConditionalTask) {
List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
for (Task<? extends Serializable> tsk : listTasks) {
setInputFormat(tsk);
}
}
if (task.getChildTasks() != null) {
for (Task<? extends Serializable> childTask : task.getChildTasks()) {
setInputFormat(childTask);
}
}
}
use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.
the class Driver method launchTask.
/**
* Launches a new task
*
* @param tsk
* task being launched
* @param queryId
* Id of the query containing the task
* @param noName
* whether the task has a name set
* @param jobname
* name of the task, if it is a map-reduce job
* @param jobs
* number of map-reduce jobs
* @param cxt
* the driver context
*/
private TaskRunner launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName, String jobname, int jobs, DriverContext cxt) throws HiveException {
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName());
}
if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) {
if (noName) {
conf.set(MRJobConfig.JOB_NAME, jobname + " (" + tsk.getId() + ")");
}
conf.set(DagUtils.MAPREDUCE_WORKFLOW_NODE_NAME, tsk.getId());
Utilities.setWorkflowAdjacencies(conf, plan);
cxt.incCurJobNo(1);
console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs);
}
tsk.initialize(queryState, plan, cxt, ctx.getOpContext());
TaskRunner tskRun = new TaskRunner(tsk);
cxt.launching(tskRun);
// Launch Task
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.canExecuteInParallel()) {
// Launch it in the parallel mode, as a separate thread only for MR tasks
if (LOG.isInfoEnabled()) {
LOG.info("Starting task [" + tsk + "] in parallel");
}
tskRun.start();
} else {
if (LOG.isInfoEnabled()) {
LOG.info("Starting task [" + tsk + "] in serial mode");
}
tskRun.runSequential();
}
return tskRun;
}
Aggregations