Search in sources :

Example 36 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class GenMapRedUtils method getMapRedWorkFromConf.

/**
 * create a new plan and return. The pan won't contain the name to split
 * sample information in parse context.
 *
 * @return the new plan
 */
public static MapredWork getMapRedWorkFromConf(HiveConf conf) {
    MapredWork mrWork = new MapredWork();
    MapWork work = mrWork.getMapWork();
    boolean mapperCannotSpanPartns = conf.getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
    work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
    work.setPathToAliases(new LinkedHashMap<Path, List<String>>());
    work.setPathToPartitionInfo(new LinkedHashMap<Path, PartitionDesc>());
    work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
    return mrWork;
}
Also used : Path(org.apache.hadoop.fs.Path) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) DemuxOperator(org.apache.hadoop.hive.ql.exec.DemuxOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 37 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class GenMapRedUtils method initPlan.

/**
 * Initialize the current plan by adding it to root tasks.
 *
 * @param op
 *          the reduce sink operator encountered
 * @param opProcCtx
 *          processing context
 */
public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
    Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
    Task<?> currTask = mapredCtx.getCurrTask();
    MapredWork plan = (MapredWork) currTask.getWork();
    HashMap<Operator<? extends OperatorDesc>, Task<?>> opTaskMap = opProcCtx.getOpTaskMap();
    TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
    opTaskMap.put(reducer, currTask);
    plan.setReduceWork(new ReduceWork());
    plan.getReduceWork().setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();
    plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
    if (needsTagging(plan.getReduceWork())) {
        plan.getReduceWork().setNeedsTagging(true);
    }
    assert currTopOp != null;
    String currAliasId = opProcCtx.getCurrAliasId();
    if (!opProcCtx.isSeenOp(currTask, currTopOp)) {
        setTaskPlan(currAliasId, currTopOp, currTask, false, opProcCtx);
    }
    currTopOp = null;
    currAliasId = null;
    opProcCtx.setCurrTask(currTask);
    opProcCtx.setCurrTopOp(currTopOp);
    opProcCtx.setCurrAliasId(currAliasId);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) DemuxOperator(org.apache.hadoop.hive.ql.exec.DemuxOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 38 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class GenMapRedUtils method initUnionPlan.

/**
 * Initialize the current union plan.
 *
 * @param op
 *          the reduce sink operator encountered
 * @param opProcCtx
 *          processing context
 */
public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<?> unionTask) throws SemanticException {
    Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
    MapredWork plan = (MapredWork) unionTask.getWork();
    HashMap<Operator<? extends OperatorDesc>, Task<?>> opTaskMap = opProcCtx.getOpTaskMap();
    opTaskMap.put(reducer, unionTask);
    plan.setReduceWork(new ReduceWork());
    plan.getReduceWork().setReducer(reducer);
    plan.getReduceWork().setReducer(reducer);
    ReduceSinkDesc desc = op.getConf();
    plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
    if (needsTagging(plan.getReduceWork())) {
        plan.getReduceWork().setNeedsTagging(true);
    }
    initUnionPlan(opProcCtx, currUnionOp, unionTask, false);
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) DemuxOperator(org.apache.hadoop.hive.ql.exec.DemuxOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Task(org.apache.hadoop.hive.ql.exec.Task) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) DependencyCollectionTask(org.apache.hadoop.hive.ql.exec.DependencyCollectionTask) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 39 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class GenMapRedUtils method replaceMapWork.

/**
 * Replace the Map-side operator tree associated with targetAlias in
 * target with the Map-side operator tree associated with sourceAlias in source.
 * @param sourceAlias
 * @param targetAlias
 * @param source
 * @param target
 */
public static void replaceMapWork(String sourceAlias, String targetAlias, MapWork source, MapWork target) {
    Map<Path, List<String>> sourcePathToAliases = source.getPathToAliases();
    Map<Path, PartitionDesc> sourcePathToPartitionInfo = source.getPathToPartitionInfo();
    Map<String, Operator<? extends OperatorDesc>> sourceAliasToWork = source.getAliasToWork();
    Map<String, PartitionDesc> sourceAliasToPartnInfo = source.getAliasToPartnInfo();
    Map<Path, List<String>> targetPathToAliases = target.getPathToAliases();
    Map<Path, PartitionDesc> targetPathToPartitionInfo = target.getPathToPartitionInfo();
    Map<String, Operator<? extends OperatorDesc>> targetAliasToWork = target.getAliasToWork();
    Map<String, PartitionDesc> targetAliasToPartnInfo = target.getAliasToPartnInfo();
    if (!sourceAliasToWork.containsKey(sourceAlias) || !targetAliasToWork.containsKey(targetAlias)) {
        // with targetAlias in target.
        return;
    }
    if (sourceAliasToWork.size() > 1) {
        // how to merge.
        return;
    }
    // Remove unnecessary information from target
    targetAliasToWork.remove(targetAlias);
    targetAliasToPartnInfo.remove(targetAlias);
    List<Path> pathsToRemove = new ArrayList<>();
    for (Entry<Path, List<String>> entry : targetPathToAliases.entrySet()) {
        List<String> aliases = entry.getValue();
        aliases.remove(targetAlias);
        if (aliases.isEmpty()) {
            pathsToRemove.add(entry.getKey());
        }
    }
    for (Path pathToRemove : pathsToRemove) {
        targetPathToAliases.remove(pathToRemove);
        targetPathToPartitionInfo.remove(pathToRemove);
    }
    // Add new information from source to target
    targetAliasToWork.put(sourceAlias, sourceAliasToWork.get(sourceAlias));
    targetAliasToPartnInfo.putAll(sourceAliasToPartnInfo);
    targetPathToPartitionInfo.putAll(sourcePathToPartitionInfo);
    List<Path> pathsToAdd = new ArrayList<>();
    for (Entry<Path, List<String>> entry : sourcePathToAliases.entrySet()) {
        List<String> aliases = entry.getValue();
        if (aliases.contains(sourceAlias)) {
            pathsToAdd.add(entry.getKey());
        }
    }
    for (Path pathToAdd : pathsToAdd) {
        if (!targetPathToAliases.containsKey(pathToAdd)) {
            targetPathToAliases.put(pathToAdd, new ArrayList<String>());
        }
        targetPathToAliases.get(pathToAdd).add(sourceAlias);
    }
    target.setPathToAliases(targetPathToAliases);
    target.setPathToPartitionInfo(targetPathToPartitionInfo);
}
Also used : Path(org.apache.hadoop.fs.Path) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) DemuxOperator(org.apache.hadoop.hive.ql.exec.DemuxOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) ArrayList(java.util.ArrayList) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) List(java.util.List) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 40 with Operator

use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.

the class ConstantPropagateProcFactory method foldExprFull.

/**
 * Fold input expression desc.
 *
 * This function recursively checks if any subexpression of a specified expression
 * can be evaluated to be constant and replaces such subexpression with the constant.
 * If the expression is a deterministic UDF and all the subexpressions are constants,
 * the value will be calculated immediately (during compilation time vs. runtime).
 * e.g.:
 *   concat(year, month) => 200112 for year=2001, month=12 since concat is deterministic UDF
 *   unix_timestamp(time) => unix_timestamp(123) for time=123 since unix_timestamp is nondeterministic UDF
 * @param desc folding expression
 * @param constants current propagated constant map
 * @param cppCtx
 * @param op processing operator
 * @param propagate if true, assignment expressions will be added to constants.
 * @return fold expression
 * @throws UDFArgumentException
 */
private static ExprNodeDesc foldExprFull(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc> constants, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag, boolean propagate) throws UDFArgumentException {
    // Combine NOT operator with the child operator. Otherwise, the following optimization
    // from bottom up could lead to incorrect result, such as not(x > 3 and x is not null),
    // should not be optimized to not(x > 3), but (x <=3 or x is null).
    desc = foldNegative(desc);
    if (desc instanceof ExprNodeGenericFuncDesc) {
        ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
        GenericUDF udf = funcDesc.getGenericUDF();
        boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass());
        List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
        for (ExprNodeDesc childExpr : desc.getChildren()) {
            newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext));
        }
        // Don't evaluate nondeterministic function since the value can only calculate during runtime.
        if (!isConstantFoldableUdf(udf, newExprs)) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evaluate immediately.");
            }
            ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
            return desc;
        } else {
            // If all child expressions of deterministic function are constants, evaluate such UDF immediately
            ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren());
            if (constant != null) {
                LOG.debug("Folding expression: {} -> {}", desc, constant);
                return constant;
            } else {
                // Check if the function can be short cut.
                ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
                if (shortcut != null) {
                    LOG.debug("Folding expression: {} -> {}", desc, shortcut);
                    return shortcut;
                }
                ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
            }
            // constant, add them to colToConstants as half-deterministic columns.
            if (propagate) {
                propagate(udf, newExprs, op.getSchema(), constants);
            }
        }
        return desc;
    } else if (desc instanceof ExprNodeColumnDesc) {
        if (op.getParentOperators() == null || op.getParentOperators().isEmpty()) {
            return desc;
        }
        Operator<? extends Serializable> parent = op.getParentOperators().get(tag);
        ExprNodeDesc col = evaluateColumn((ExprNodeColumnDesc) desc, cppCtx, parent);
        if (col != null) {
            LOG.debug("Folding expression: {} -> {}", desc, col);
            return col;
        }
    }
    return desc;
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Serializable(java.io.Serializable) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ArrayList(java.util.ArrayList) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Aggregations

Operator (org.apache.hadoop.hive.ql.exec.Operator)215 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)167 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)156 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)134 ArrayList (java.util.ArrayList)123 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)119 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)118 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)107 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)103 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)97 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)85 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)85 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)79 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)71 HashMap (java.util.HashMap)65 LinkedHashMap (java.util.LinkedHashMap)64 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)62 LimitOperator (org.apache.hadoop.hive.ql.exec.LimitOperator)60 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)59 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)52