use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class GenMapRedUtils method getMapRedWorkFromConf.
/**
* create a new plan and return. The pan won't contain the name to split
* sample information in parse context.
*
* @return the new plan
*/
public static MapredWork getMapRedWorkFromConf(HiveConf conf) {
MapredWork mrWork = new MapredWork();
MapWork work = mrWork.getMapWork();
boolean mapperCannotSpanPartns = conf.getBoolVar(HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
work.setPathToAliases(new LinkedHashMap<Path, List<String>>());
work.setPathToPartitionInfo(new LinkedHashMap<Path, PartitionDesc>());
work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
return mrWork;
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class GenMapRedUtils method initPlan.
/**
* Initialize the current plan by adding it to root tasks.
*
* @param op
* the reduce sink operator encountered
* @param opProcCtx
* processing context
*/
public static void initPlan(ReduceSinkOperator op, GenMRProcContext opProcCtx) throws SemanticException {
Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = opProcCtx.getMapCurrCtx();
GenMapRedCtx mapredCtx = mapCurrCtx.get(op.getParentOperators().get(0));
Task<?> currTask = mapredCtx.getCurrTask();
MapredWork plan = (MapredWork) currTask.getWork();
HashMap<Operator<? extends OperatorDesc>, Task<?>> opTaskMap = opProcCtx.getOpTaskMap();
TableScanOperator currTopOp = opProcCtx.getCurrTopOp();
opTaskMap.put(reducer, currTask);
plan.setReduceWork(new ReduceWork());
plan.getReduceWork().setReducer(reducer);
ReduceSinkDesc desc = op.getConf();
plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
if (needsTagging(plan.getReduceWork())) {
plan.getReduceWork().setNeedsTagging(true);
}
assert currTopOp != null;
String currAliasId = opProcCtx.getCurrAliasId();
if (!opProcCtx.isSeenOp(currTask, currTopOp)) {
setTaskPlan(currAliasId, currTopOp, currTask, false, opProcCtx);
}
currTopOp = null;
currAliasId = null;
opProcCtx.setCurrTask(currTask);
opProcCtx.setCurrTopOp(currTopOp);
opProcCtx.setCurrAliasId(currAliasId);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class GenMapRedUtils method initUnionPlan.
/**
* Initialize the current union plan.
*
* @param op
* the reduce sink operator encountered
* @param opProcCtx
* processing context
*/
public static void initUnionPlan(ReduceSinkOperator op, UnionOperator currUnionOp, GenMRProcContext opProcCtx, Task<?> unionTask) throws SemanticException {
Operator<? extends OperatorDesc> reducer = op.getChildOperators().get(0);
MapredWork plan = (MapredWork) unionTask.getWork();
HashMap<Operator<? extends OperatorDesc>, Task<?>> opTaskMap = opProcCtx.getOpTaskMap();
opTaskMap.put(reducer, unionTask);
plan.setReduceWork(new ReduceWork());
plan.getReduceWork().setReducer(reducer);
plan.getReduceWork().setReducer(reducer);
ReduceSinkDesc desc = op.getConf();
plan.getReduceWork().setNumReduceTasks(desc.getNumReducers());
if (needsTagging(plan.getReduceWork())) {
plan.getReduceWork().setNeedsTagging(true);
}
initUnionPlan(opProcCtx, currUnionOp, unionTask, false);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class GenMapRedUtils method replaceMapWork.
/**
* Replace the Map-side operator tree associated with targetAlias in
* target with the Map-side operator tree associated with sourceAlias in source.
* @param sourceAlias
* @param targetAlias
* @param source
* @param target
*/
public static void replaceMapWork(String sourceAlias, String targetAlias, MapWork source, MapWork target) {
Map<Path, List<String>> sourcePathToAliases = source.getPathToAliases();
Map<Path, PartitionDesc> sourcePathToPartitionInfo = source.getPathToPartitionInfo();
Map<String, Operator<? extends OperatorDesc>> sourceAliasToWork = source.getAliasToWork();
Map<String, PartitionDesc> sourceAliasToPartnInfo = source.getAliasToPartnInfo();
Map<Path, List<String>> targetPathToAliases = target.getPathToAliases();
Map<Path, PartitionDesc> targetPathToPartitionInfo = target.getPathToPartitionInfo();
Map<String, Operator<? extends OperatorDesc>> targetAliasToWork = target.getAliasToWork();
Map<String, PartitionDesc> targetAliasToPartnInfo = target.getAliasToPartnInfo();
if (!sourceAliasToWork.containsKey(sourceAlias) || !targetAliasToWork.containsKey(targetAlias)) {
// with targetAlias in target.
return;
}
if (sourceAliasToWork.size() > 1) {
// how to merge.
return;
}
// Remove unnecessary information from target
targetAliasToWork.remove(targetAlias);
targetAliasToPartnInfo.remove(targetAlias);
List<Path> pathsToRemove = new ArrayList<>();
for (Entry<Path, List<String>> entry : targetPathToAliases.entrySet()) {
List<String> aliases = entry.getValue();
aliases.remove(targetAlias);
if (aliases.isEmpty()) {
pathsToRemove.add(entry.getKey());
}
}
for (Path pathToRemove : pathsToRemove) {
targetPathToAliases.remove(pathToRemove);
targetPathToPartitionInfo.remove(pathToRemove);
}
// Add new information from source to target
targetAliasToWork.put(sourceAlias, sourceAliasToWork.get(sourceAlias));
targetAliasToPartnInfo.putAll(sourceAliasToPartnInfo);
targetPathToPartitionInfo.putAll(sourcePathToPartitionInfo);
List<Path> pathsToAdd = new ArrayList<>();
for (Entry<Path, List<String>> entry : sourcePathToAliases.entrySet()) {
List<String> aliases = entry.getValue();
if (aliases.contains(sourceAlias)) {
pathsToAdd.add(entry.getKey());
}
}
for (Path pathToAdd : pathsToAdd) {
if (!targetPathToAliases.containsKey(pathToAdd)) {
targetPathToAliases.put(pathToAdd, new ArrayList<String>());
}
targetPathToAliases.get(pathToAdd).add(sourceAlias);
}
target.setPathToAliases(targetPathToAliases);
target.setPathToPartitionInfo(targetPathToPartitionInfo);
}
use of org.apache.hadoop.hive.ql.exec.Operator in project hive by apache.
the class ConstantPropagateProcFactory method foldExprFull.
/**
* Fold input expression desc.
*
* This function recursively checks if any subexpression of a specified expression
* can be evaluated to be constant and replaces such subexpression with the constant.
* If the expression is a deterministic UDF and all the subexpressions are constants,
* the value will be calculated immediately (during compilation time vs. runtime).
* e.g.:
* concat(year, month) => 200112 for year=2001, month=12 since concat is deterministic UDF
* unix_timestamp(time) => unix_timestamp(123) for time=123 since unix_timestamp is nondeterministic UDF
* @param desc folding expression
* @param constants current propagated constant map
* @param cppCtx
* @param op processing operator
* @param propagate if true, assignment expressions will be added to constants.
* @return fold expression
* @throws UDFArgumentException
*/
private static ExprNodeDesc foldExprFull(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc> constants, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag, boolean propagate) throws UDFArgumentException {
// Combine NOT operator with the child operator. Otherwise, the following optimization
// from bottom up could lead to incorrect result, such as not(x > 3 and x is not null),
// should not be optimized to not(x > 3), but (x <=3 or x is null).
desc = foldNegative(desc);
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
GenericUDF udf = funcDesc.getGenericUDF();
boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass());
List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc childExpr : desc.getChildren()) {
newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext));
}
// Don't evaluate nondeterministic function since the value can only calculate during runtime.
if (!isConstantFoldableUdf(udf, newExprs)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evaluate immediately.");
}
((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
return desc;
} else {
// If all child expressions of deterministic function are constants, evaluate such UDF immediately
ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren());
if (constant != null) {
LOG.debug("Folding expression: {} -> {}", desc, constant);
return constant;
} else {
// Check if the function can be short cut.
ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
if (shortcut != null) {
LOG.debug("Folding expression: {} -> {}", desc, shortcut);
return shortcut;
}
((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
}
// constant, add them to colToConstants as half-deterministic columns.
if (propagate) {
propagate(udf, newExprs, op.getSchema(), constants);
}
}
return desc;
} else if (desc instanceof ExprNodeColumnDesc) {
if (op.getParentOperators() == null || op.getParentOperators().isEmpty()) {
return desc;
}
Operator<? extends Serializable> parent = op.getParentOperators().get(tag);
ExprNodeDesc col = evaluateColumn((ExprNodeColumnDesc) desc, cppCtx, parent);
if (col != null) {
LOG.debug("Folding expression: {} -> {}", desc, col);
return col;
}
}
return desc;
}
Aggregations