Search in sources :

Example 1 with MapredLocalTask

use of org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask in project hive by apache.

the class MoveTask method inferTaskInformation.

private void inferTaskInformation(TaskInformation ti) {
    // (Either standard, local, or a merge)
    while (ti.task.getParentTasks() != null && ti.task.getParentTasks().size() == 1) {
        ti.task = (Task) ti.task.getParentTasks().get(0);
        // If it was a merge task or a local map reduce task, nothing can be inferred
        if (ti.task instanceof MergeFileTask || ti.task instanceof MapredLocalTask) {
            break;
        }
        // the directory this move task is moving
        if (ti.task instanceof MapRedTask) {
            MapredWork work = (MapredWork) ti.task.getWork();
            MapWork mapWork = work.getMapWork();
            ti.bucketCols = mapWork.getBucketedColsByDirectory().get(ti.path);
            ti.sortCols = mapWork.getSortedColsByDirectory().get(ti.path);
            if (work.getReduceWork() != null) {
                ti.numBuckets = work.getReduceWork().getNumReduceTasks();
            }
            if (ti.bucketCols != null || ti.sortCols != null) {
                // operator that writes the final output)
                assert work.isFinalMapRed();
            }
            break;
        }
        // condition for merging is not met, see GenMRFileSink1.
        if (ti.task instanceof MoveTask) {
            MoveTask mt = (MoveTask) ti.task;
            if (mt.getWork().getLoadFileWork() != null) {
                ti.path = mt.getWork().getLoadFileWork().getSourcePath().toUri().toString();
            }
        }
    }
}
Also used : MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask)

Example 2 with MapredLocalTask

use of org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask in project hive by apache.

the class HashTableLoader method loadDirectly.

private void loadDirectly(MapJoinTableContainer[] mapJoinTables, String inputFileName) throws Exception {
    MapredLocalWork localWork = context.getLocalWork();
    List<Operator<?>> directWorks = localWork.getDirectFetchOp().get(joinOp);
    if (directWorks == null || directWorks.isEmpty()) {
        return;
    }
    JobConf job = new JobConf(hconf);
    MapredLocalTask localTask = new MapredLocalTask(localWork, job, false);
    HashTableSinkOperator sink = new TemporaryHashSinkOperator(new CompilationOpContext(), desc);
    sink.setParentOperators(new ArrayList<Operator<? extends OperatorDesc>>(directWorks));
    for (Operator<?> operator : directWorks) {
        if (operator != null) {
            operator.setChildOperators(Arrays.<Operator<? extends OperatorDesc>>asList(sink));
        }
    }
    localTask.setExecContext(context);
    localTask.startForward(inputFileName);
    MapJoinTableContainer[] tables = sink.getMapJoinTables();
    for (int i = 0; i < sink.getNumParent(); i++) {
        if (sink.getParentOperators().get(i) != null) {
            mapJoinTables[i] = tables[i];
        }
    }
    Arrays.fill(tables, null);
}
Also used : MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) VectorizationOperator(org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) MapredLocalTask(org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask) HashTableSinkOperator(org.apache.hadoop.hive.ql.exec.HashTableSinkOperator) TemporaryHashSinkOperator(org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) MapJoinTableContainer(org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer) JobConf(org.apache.hadoop.mapred.JobConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

MapredLocalTask (org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask)2 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)1 HashTableSinkOperator (org.apache.hadoop.hive.ql.exec.HashTableSinkOperator)1 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)1 Operator (org.apache.hadoop.hive.ql.exec.Operator)1 TemporaryHashSinkOperator (org.apache.hadoop.hive.ql.exec.TemporaryHashSinkOperator)1 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)1 MapJoinTableContainer (org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer)1 VectorizationOperator (org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator)1 MergeFileTask (org.apache.hadoop.hive.ql.io.merge.MergeFileTask)1 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)1 MapredLocalWork (org.apache.hadoop.hive.ql.plan.MapredLocalWork)1 MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)1 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)1 JobConf (org.apache.hadoop.mapred.JobConf)1