Search in sources :

Example 21 with MapredWork

use of org.apache.hadoop.hive.ql.plan.MapredWork in project SQLWindowing by hbutani.

the class MRExecutor method execute.

/* 
	 * Create a MapRedWork object and an operator tree 
	 * for processing queries with table functions. 
	 * Execute the plan defined in the MapRedWork using 
	 * the Hive runtime environment. 
	 */
@Override
public void execute(QueryDef qdef, WindowingShell wShell) throws WindowingException {
    deleteQueryOutputDir(qdef);
    MapredWork mr = PlanUtils.getMapRedWork();
    try {
        createOperatorTree(qdef, mr);
        executePlan(mr, wShell.getCfg());
    } catch (SemanticException se) {
        throw new WindowingException(se);
    } catch (Exception e) {
        throw new WindowingException(e);
    }
}
Also used : MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) WindowingException(com.sap.hadoop.windowing.WindowingException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) IOException(java.io.IOException) WindowingException(com.sap.hadoop.windowing.WindowingException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 22 with MapredWork

use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.

the class GenMRUnion1 method process.

/**
   * Union Operator encountered . Currently, the algorithm is pretty simple: If
   * all the sub-queries are map-only, don't do anything. Otherwise, insert a
   * FileSink on top of all the sub-queries.
   *
   * This can be optimized later on.
   *
   * @param nd
   *          the file sink operator encountered
   * @param opProcCtx
   *          context
   */
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    UnionOperator union = (UnionOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    UnionProcContext uCtx = parseCtx.getUCtx();
    // Map-only subqueries can be optimized in future to not write to a file in
    // future
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    if (union.getConf().isAllInputsInSameReducer()) {
        // All inputs of this UnionOperator are in the same Reducer.
        // We do not need to break the operator tree.
        mapCurrCtx.put((Operator<? extends OperatorDesc>) nd, new GenMapRedCtx(ctx.getCurrTask(), ctx.getCurrAliasId()));
        return null;
    }
    UnionParseContext uPrsCtx = uCtx.getUnionParseContext(union);
    ctx.setCurrUnionOp(union);
    // map-reduce job
    if (uPrsCtx.allMapOnlySubQ()) {
        return processMapOnlyUnion(union, stack, ctx, uCtx);
    }
    assert uPrsCtx != null;
    Task<? extends Serializable> currTask = ctx.getCurrTask();
    int pos = UnionProcFactory.getPositionParent(union, stack);
    Task<? extends Serializable> uTask = null;
    MapredWork uPlan = null;
    // union is encountered for the first time
    GenMRUnionCtx uCtxTask = ctx.getUnionTask(union);
    if (uCtxTask == null) {
        uPlan = GenMapRedUtils.getMapRedWork(parseCtx);
        uTask = TaskFactory.get(uPlan, parseCtx.getConf());
        uCtxTask = new GenMRUnionCtx(uTask);
        ctx.setUnionTask(union, uCtxTask);
    } else {
        uTask = uCtxTask.getUTask();
    }
    // Copy into the current union task plan if
    if (uPrsCtx.getMapOnlySubq(pos) && uPrsCtx.getRootTask(pos)) {
        processSubQueryUnionMerge(ctx, uCtxTask, union, stack);
        if (ctx.getRootTasks().contains(currTask)) {
            ctx.getRootTasks().remove(currTask);
        }
    } else // If it a map-reduce job, create a temporary file
    {
        // is the current task a root task
        if (shouldBeRootTask(currTask) && !ctx.getRootTasks().contains(currTask) && (currTask.getParentTasks() == null || currTask.getParentTasks().isEmpty())) {
            ctx.getRootTasks().add(currTask);
        }
        processSubQueryUnionCreateIntermediate(union.getParentOperators().get(pos), union, uTask, ctx, uCtxTask);
        // the currAliasId and CurrTopOp is not valid any more
        ctx.setCurrAliasId(null);
        ctx.setCurrTopOp(null);
        ctx.getOpTaskMap().put(null, uTask);
    }
    ctx.setCurrTask(uTask);
    mapCurrCtx.put((Operator<? extends OperatorDesc>) nd, new GenMapRedCtx(ctx.getCurrTask(), null));
    return true;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) UnionProcContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext) UnionParseContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext) GenMRUnionCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMRUnionCtx) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) UnionParseContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 23 with MapredWork

use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.

the class GenMapRedUtils method createMRWorkForMergingFiles.

/**
   * Create a MapredWork based on input path, the top operator and the input
   * table descriptor.
   *
   * @param conf
   * @param topOp
   *          the table scan operator that is the root of the MapReduce task.
   * @param fsDesc
   *          the file sink descriptor that serves as the input to this merge task.
   * @param parentMR
   *          the parent MapReduce work
   * @param parentFS
   *          the last FileSinkOperator in the parent MapReduce work
   * @return the MapredWork
   */
private static MapWork createMRWorkForMergingFiles(HiveConf conf, TableScanOperator topOp, FileSinkDesc fsDesc) {
    ArrayList<String> aliases = new ArrayList<String>();
    Path inputDir = StringInternUtils.internUriStringsInPath(fsDesc.getFinalDirName());
    String inputDirStr = inputDir.toString().intern();
    TableDesc tblDesc = fsDesc.getTableInfo();
    // dummy alias: just use the input path
    aliases.add(inputDirStr);
    // constructing the default MapredWork
    MapredWork cMrPlan = GenMapRedUtils.getMapRedWorkFromConf(conf);
    MapWork cplan = cMrPlan.getMapWork();
    cplan.addPathToAlias(inputDir, aliases);
    cplan.addPathToPartitionInfo(inputDir, new PartitionDesc(tblDesc, null));
    cplan.getAliasToWork().put(inputDirStr, topOp);
    cplan.setMapperCannotSpanPartns(true);
    return cplan;
}
Also used : Path(org.apache.hadoop.fs.Path) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) ArrayList(java.util.ArrayList) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 24 with MapredWork

use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.

the class GenMapRedUtils method initUnionPlan.

/**
   * Initialize the current union plan.
   *
   * @param op
   *          the reduce sink operator encountered
   * @param opProcCtx
   *          processing context
   */
public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<? extends Serializable> unionTask) throws SemanticException {
    Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
    MapredWork plan = (MapredWork) unionTask.getWork();
    HashMap<Operator<? extends OperatorDesc>, Task<? extends Serializable>> opTaskMap = opProcCtx.getOpTaskMap();
    opTaskMap.put(reducer, unionTask);
    plan.setReduceWork(new ReduceWork());
    plan.getReduceWork().setReducer(reducer);
    plan.getReduceWork().setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();
    plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
    if (needsTagging(plan.getReduceWork())) {
        plan.getReduceWork().setNeedsTagging(true);
    }
    initUnionPlan(opProcCtx, currUnionOp, unionTask, false);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) DemuxOperator(org.apache.hadoop.hive.ql.exec.DemuxOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) Serializable(java.io.Serializable) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 25 with MapredWork

use of org.apache.hadoop.hive.ql.plan.MapredWork in project hive by apache.

the class GenMapRedUtils method getMapRedWork.

/**
   * create a new plan and return.
   *
   * @return the new plan
   */
public static MapredWork getMapRedWork(ParseContext parseCtx) {
    MapredWork work = getMapRedWorkFromConf(parseCtx.getConf());
    work.getMapWork().setNameToSplitSample(parseCtx.getNameToSplitSample());
    return work;
}
Also used : MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork)

Aggregations

MapredWork (org.apache.hadoop.hive.ql.plan.MapredWork)43 Path (org.apache.hadoop.fs.Path)17 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)13 Serializable (java.io.Serializable)12 Operator (org.apache.hadoop.hive.ql.exec.Operator)12 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 MapRedTask (org.apache.hadoop.hive.ql.exec.mr.MapRedTask)12 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)12 ArrayList (java.util.ArrayList)10 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)10 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)10 LinkedHashMap (java.util.LinkedHashMap)9 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)9 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)9 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)8 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)8 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)7 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)7 ReduceWork (org.apache.hadoop.hive.ql.plan.ReduceWork)7