Search in sources :

Example 16 with ConditionalTask

use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.

the class SparkCrossProductCheck method dispatch.

@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
    @SuppressWarnings("unchecked") Task<?> currTask = (Task<?>) nd;
    if (currTask instanceof SparkTask) {
        SparkWork sparkWork = ((SparkTask) currTask).getWork();
        checkShuffleJoin(sparkWork);
        checkMapJoin((SparkTask) currTask);
    } else if (currTask instanceof ConditionalTask) {
        List<Task<?>> taskList = ((ConditionalTask) currTask).getListTasks();
        for (Task<?> task : taskList) {
            dispatch(task, stack, nodeOutputs);
        }
    }
    return null;
}
Also used : SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ArrayList(java.util.ArrayList) List(java.util.List) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork)

Example 17 with ConditionalTask

use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.

the class AbstractJoinTaskDispatcher method dispatch.

@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
    if (nodeOutputs == null || nodeOutputs.length == 0) {
        throw new SemanticException("No Dispatch Context");
    }
    TaskGraphWalkerContext walkerCtx = (TaskGraphWalkerContext) nodeOutputs[0];
    Task<?> currTask = (Task<?>) nd;
    // not map reduce task or not conditional task, just skip
    if (currTask.isMapRedTask()) {
        if (currTask instanceof ConditionalTask) {
            // get the list of task
            List<Task<?>> taskList = ((ConditionalTask) currTask).getListTasks();
            for (Task<?> tsk : taskList) {
                if (tsk.isMapRedTask()) {
                    Task<?> newTask = this.processCurrentTask((MapRedTask) tsk, ((ConditionalTask) currTask), physicalContext.getContext());
                    walkerCtx.addToDispatchList(newTask);
                }
            }
        } else {
            Task<?> newTask = this.processCurrentTask((MapRedTask) currTask, null, physicalContext.getContext());
            walkerCtx.addToDispatchList(newTask);
        }
    }
    return null;
}
Also used : TaskGraphWalkerContext(org.apache.hadoop.hive.ql.lib.TaskGraphWalker.TaskGraphWalkerContext) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 18 with ConditionalTask

use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.

the class GenMapRedUtils method createCondTask.

/**
   * Construct a conditional task given the current leaf task, the MoveWork and the MapredWork.
   *
   * @param conf
   *          HiveConf
   * @param currTask
   *          current leaf task
   * @param dummyMoveWork
   *          MoveWork for the move task
   * @param mergeWork
   *          MapredWork for the merge task.
   * @param condInputPath
   *          the input directory of the merge/move task
   * @param condOutputPath
   *          the output directory of the merge/move task
   * @param moveTaskToLink
   *          a MoveTask that may be linked to the conditional sub-tasks
   * @param dependencyTask
   *          a dependency task that may be linked to the conditional sub-tasks
   * @return The conditional task
   */
private static ConditionalTask createCondTask(HiveConf conf, Task<? extends Serializable> currTask, MoveWork dummyMoveWork, Serializable mergeWork, Path condInputPath, Path condOutputPath, Task<MoveWork> moveTaskToLink, DependencyCollectionTask dependencyTask) {
    boolean shouldMergeMovePaths = (moveTaskToLink != null && dependencyTask == null && shouldMergeMovePaths(conf, condInputPath, condOutputPath, moveTaskToLink.getWork()));
    MoveWork workForMoveOnlyTask;
    if (shouldMergeMovePaths) {
        workForMoveOnlyTask = mergeMovePaths(condInputPath, moveTaskToLink.getWork());
    } else {
        workForMoveOnlyTask = dummyMoveWork;
    }
    // There are 3 options for this ConditionalTask:
    // 1) Merge the partitions
    // 2) Move the partitions (i.e. don't merge the partitions)
    // 3) Merge some partitions and move other partitions (i.e. merge some partitions and don't
    // merge others) in this case the merge is done first followed by the move to prevent
    // conflicts.
    Task<? extends Serializable> mergeOnlyMergeTask = TaskFactory.get(mergeWork, conf);
    Task<? extends Serializable> moveOnlyMoveTask = TaskFactory.get(workForMoveOnlyTask, conf);
    Task<? extends Serializable> mergeAndMoveMergeTask = TaskFactory.get(mergeWork, conf);
    Task<? extends Serializable> mergeAndMoveMoveTask = TaskFactory.get(dummyMoveWork, conf);
    // NOTE! It is necessary merge task is the parent of the move task, and not
    // the other way around, for the proper execution of the execute method of
    // ConditionalTask
    mergeAndMoveMergeTask.addDependentTask(mergeAndMoveMoveTask);
    List<Serializable> listWorks = new ArrayList<Serializable>();
    listWorks.add(workForMoveOnlyTask);
    listWorks.add(mergeWork);
    ConditionalWork cndWork = new ConditionalWork(listWorks);
    List<Task<? extends Serializable>> listTasks = new ArrayList<Task<? extends Serializable>>();
    listTasks.add(moveOnlyMoveTask);
    listTasks.add(mergeOnlyMergeTask);
    listTasks.add(mergeAndMoveMergeTask);
    ConditionalTask cndTsk = (ConditionalTask) TaskFactory.get(cndWork, conf);
    cndTsk.setListTasks(listTasks);
    // create resolver
    cndTsk.setResolver(new ConditionalResolverMergeFiles());
    ConditionalResolverMergeFilesCtx mrCtx = new ConditionalResolverMergeFilesCtx(listTasks, condInputPath.toString());
    cndTsk.setResolverCtx(mrCtx);
    // make the conditional task as the child of the current leaf task
    currTask.addDependentTask(cndTsk);
    if (shouldMergeMovePaths) {
        // If a new MoveWork was created, then we should link all dependent tasks from the MoveWork to link.
        if (moveTaskToLink.getDependentTasks() != null) {
            for (Task dependentTask : moveTaskToLink.getDependentTasks()) {
                moveOnlyMoveTask.addDependentTask(dependentTask);
            }
        }
    } else {
        addDependentMoveTasks(moveTaskToLink, conf, moveOnlyMoveTask, dependencyTask);
    }
    addDependentMoveTasks(moveTaskToLink, conf, mergeOnlyMergeTask, dependencyTask);
    addDependentMoveTasks(moveTaskToLink, conf, mergeAndMoveMoveTask, dependencyTask);
    return cndTsk;
}
Also used : MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) ConditionalResolverMergeFiles(org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles) Serializable(java.io.Serializable) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) ArrayList(java.util.ArrayList) ConditionalWork(org.apache.hadoop.hive.ql.plan.ConditionalWork) ConditionalResolverMergeFilesCtx(org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx)

Example 19 with ConditionalTask

use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.

the class SparkCompiler method setInputFormat.

@Override
protected void setInputFormat(Task<? extends Serializable> task) {
    if (task instanceof SparkTask) {
        SparkWork work = ((SparkTask) task).getWork();
        List<BaseWork> all = work.getAllWork();
        for (BaseWork w : all) {
            if (w instanceof MapWork) {
                MapWork mapWork = (MapWork) w;
                HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork();
                if (!opMap.isEmpty()) {
                    for (Operator<? extends OperatorDesc> op : opMap.values()) {
                        setInputFormat(mapWork, op);
                    }
                }
            }
        }
    } else if (task instanceof ConditionalTask) {
        List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks();
        for (Task<? extends Serializable> tsk : listTasks) {
            setInputFormat(tsk);
        }
    }
    if (task.getChildTasks() != null) {
        for (Task<? extends Serializable> childTask : task.getChildTasks()) {
            setInputFormat(childTask);
        }
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) Serializable(java.io.Serializable) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) List(java.util.List) ArrayList(java.util.ArrayList) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 20 with ConditionalTask

use of org.apache.hadoop.hive.ql.exec.ConditionalTask in project hive by apache.

the class Driver method launchTask.

/**
 * Launches a new task
 *
 * @param tsk
 *          task being launched
 * @param queryId
 *          Id of the query containing the task
 * @param noName
 *          whether the task has a name set
 * @param jobname
 *          name of the task, if it is a map-reduce job
 * @param jobs
 *          number of map-reduce jobs
 * @param cxt
 *          the driver context
 */
private TaskRunner launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName, String jobname, int jobs, DriverContext cxt) throws HiveException {
    if (SessionState.get() != null) {
        SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName());
    }
    if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) {
        if (noName) {
            conf.set(MRJobConfig.JOB_NAME, jobname + " (" + tsk.getId() + ")");
        }
        conf.set(DagUtils.MAPREDUCE_WORKFLOW_NODE_NAME, tsk.getId());
        Utilities.setWorkflowAdjacencies(conf, plan);
        cxt.incCurJobNo(1);
        console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs);
    }
    tsk.initialize(queryState, plan, cxt, ctx.getOpContext());
    TaskRunner tskRun = new TaskRunner(tsk);
    cxt.launching(tskRun);
    // Launch Task
    if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.canExecuteInParallel()) {
        // Launch it in the parallel mode, as a separate thread only for MR tasks
        if (LOG.isInfoEnabled()) {
            LOG.info("Starting task [" + tsk + "] in parallel");
        }
        tskRun.start();
    } else {
        if (LOG.isInfoEnabled()) {
            LOG.info("Starting task [" + tsk + "] in serial mode");
        }
        tskRun.runSequential();
    }
    return tskRun;
}
Also used : ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner)

Aggregations

ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)29 Task (org.apache.hadoop.hive.ql.exec.Task)24 ArrayList (java.util.ArrayList)19 Serializable (java.io.Serializable)16 List (java.util.List)15 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)14 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)13 Path (org.apache.hadoop.fs.Path)11 Operator (org.apache.hadoop.hive.ql.exec.Operator)11 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)11 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)10 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)9 ConditionalWork (org.apache.hadoop.hive.ql.plan.ConditionalWork)9 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)8 SparkTask (org.apache.hadoop.hive.ql.exec.spark.SparkTask)8 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)8 SparkWork (org.apache.hadoop.hive.ql.plan.SparkWork)8 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)7 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)7