Search in sources :

Example 1 with MapRedTask

use of org.apache.hadoop.hive.ql.exec.mr.MapRedTask in project hive by apache.

the class CrossProductCheck method dispatch.

@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
    @SuppressWarnings("unchecked") Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd;
    if (currTask instanceof MapRedTask) {
        MapRedTask mrTsk = (MapRedTask) currTask;
        MapredWork mrWrk = mrTsk.getWork();
        checkMapJoins(mrTsk);
        checkMRReducer(currTask.toString(), mrWrk);
    } else if (currTask instanceof ConditionalTask) {
        List<Task<? extends Serializable>> taskListInConditionalTask = ((ConditionalTask) currTask).getListTasks();
        for (Task<? extends Serializable> tsk : taskListInConditionalTask) {
            dispatch(tsk, stack, nodeOutputs);
        }
    } else if (currTask instanceof TezTask) {
        TezTask tzTask = (TezTask) currTask;
        TezWork tzWrk = tzTask.getWork();
        checkMapJoins(tzWrk);
        checkTezReducer(tzWrk);
    }
    return null;
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) Serializable(java.io.Serializable) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) ArrayList(java.util.ArrayList) List(java.util.List) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 2 with MapRedTask

use of org.apache.hadoop.hive.ql.exec.mr.MapRedTask in project hive by apache.

the class DriverContext method finished.

public void finished(TaskRunner runner) {
    if (statsTasks.isEmpty() || !(runner.getTask() instanceof MapRedTask)) {
        return;
    }
    MapRedTask mapredTask = (MapRedTask) runner.getTask();
    MapWork mapWork = mapredTask.getWork().getMapWork();
    ReduceWork reduceWork = mapredTask.getWork().getReduceWork();
    List<Operator> operators = new ArrayList<Operator>(mapWork.getAliasToWork().values());
    if (reduceWork != null) {
        operators.add(reduceWork.getReducer());
    }
    final List<String> statKeys = new ArrayList<String>(1);
    NodeUtils.iterate(operators, FileSinkOperator.class, new Function<FileSinkOperator>() {

        @Override
        public void apply(FileSinkOperator fsOp) {
            if (fsOp.getConf().isGatherStats()) {
                statKeys.add(fsOp.getConf().getStatsAggPrefix());
            }
        }
    });
    for (String statKey : statKeys) {
        statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
    }
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork)

Example 3 with MapRedTask

use of org.apache.hadoop.hive.ql.exec.mr.MapRedTask in project hive by apache.

the class GenMapRedUtils method joinPlan.

/**
 * Merge the current task into the old task for the reducer
 *
 * @param currTask
 *          the current task for the current reducer
 * @param oldTask
 *          the old task for the current reducer
 * @param opProcCtx
 *          processing context
 */
public static void joinPlan(Task<? extends Serializable> currTask, Task<? extends Serializable> oldTask, GenMRProcContext opProcCtx) throws SemanticException {
    assert currTask != null && oldTask != null;
    TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
    List<Task<? extends Serializable>> parTasks = null;
    // terminate the old task and make current task dependent on it
    if (currTask.getParentTasks() != null && !currTask.getParentTasks().isEmpty()) {
        parTasks = new ArrayList<Task<? extends Serializable>>();
        parTasks.addAll(currTask.getParentTasks());
        Object[] parTaskArr = parTasks.toArray();
        for (Object element : parTaskArr) {
            ((Task<? extends Serializable>) element).removeDependentTask(currTask);
        }
    }
    if (currTopOp != null) {
        mergeInput(currTopOp, opProcCtx, oldTask, false);
    }
    if (parTasks != null) {
        for (Task<? extends Serializable> parTask : parTasks) {
            parTask.addDependentTask(oldTask);
        }
    }
    if (oldTask instanceof MapRedTask && currTask instanceof MapRedTask) {
        ((MapRedTask) currTask).getWork().getMapWork().mergingInto(((MapRedTask) oldTask).getWork().getMapWork());
    }
    opProcCtx.setCurrTopOp(null);
    opProcCtx.setCurrTask(oldTask);
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) Serializable(java.io.Serializable)

Example 4 with MapRedTask

use of org.apache.hadoop.hive.ql.exec.mr.MapRedTask in project hive by apache.

the class CommonJoinTaskDispatcher method convertTaskToMapJoinTask.

// create map join task and set big table as bigTablePosition
private MapRedTask convertTaskToMapJoinTask(MapredWork newWork, int bigTablePosition) throws SemanticException {
    // create a mapred task for this work
    MapRedTask newTask = (MapRedTask) TaskFactory.get(newWork);
    JoinOperator newJoinOp = getJoinOp(newTask);
    // optimize this newWork given the big table position
    MapJoinProcessor.genMapJoinOpAndLocalWork(physicalContext.getParseContext().getConf(), newWork, newJoinOp, bigTablePosition);
    return newTask;
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator)

Example 5 with MapRedTask

use of org.apache.hadoop.hive.ql.exec.mr.MapRedTask in project hive by apache.

the class AbstractJoinTaskDispatcher method dispatch.

@Override
public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException {
    if (nodeOutputs == null || nodeOutputs.length == 0) {
        throw new SemanticException("No Dispatch Context");
    }
    TaskGraphWalkerContext walkerCtx = (TaskGraphWalkerContext) nodeOutputs[0];
    Task<?> currTask = (Task<?>) nd;
    // not map reduce task or not conditional task, just skip
    if (currTask.isMapRedTask()) {
        if (currTask instanceof ConditionalTask) {
            // get the list of task
            List<Task<?>> taskList = ((ConditionalTask) currTask).getListTasks();
            for (Task<?> tsk : taskList) {
                if (tsk.isMapRedTask()) {
                    Task<?> newTask = this.processCurrentTask((MapRedTask) tsk, ((ConditionalTask) currTask), physicalContext.getContext());
                    walkerCtx.addToDispatchList(newTask);
                }
            }
        } else {
            Task<?> newTask = this.processCurrentTask((MapRedTask) currTask, null, physicalContext.getContext());
            walkerCtx.addToDispatchList(newTask);
        }
    }
    return null;
}
Also used : TaskGraphWalkerContext(org.apache.hadoop.hive.ql.lib.TaskGraphWalker.TaskGraphWalkerContext) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Aggregations

MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)21 Task (org.apache.hadoop.hive.ql.exec.Task)9 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)9 ArrayList (java.util.ArrayList)8 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)8 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)8 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)7 List (java.util.List)6 Operator (org.apache.hadoop.hive.ql.exec.Operator)5 Serializable (java.io.Serializable)4 Path (org.apache.hadoop.fs.Path)4 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)4 IOException (java.io.IOException)3 HashSet (java.util.HashSet)3 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)3 TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)3 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)3 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)3 HashMap (java.util.HashMap)2