use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class ColumnPrunerProcCtx method getColsFromSelectExpr.
/**
* Creates the list of internal column names (represented by field nodes)
* from select expressions in a select operator. This function is used for the
* select operator instead of the genColLists function (which is used by
* the rest of the operators).
*
* @param op The select operator.
* @return a list of field nodes representing the internal column names.
*/
public List<FieldNode> getColsFromSelectExpr(SelectOperator op) {
List<FieldNode> cols = new ArrayList<>();
SelectDesc conf = op.getConf();
if (conf.isSelStarNoCompute()) {
for (ColumnInfo colInfo : op.getSchema().getSignature()) {
cols.add(new FieldNode(colInfo.getInternalName()));
}
} else {
List<ExprNodeDesc> exprList = conf.getColList();
for (ExprNodeDesc expr : exprList) {
cols = mergeFieldNodesWithDesc(cols, expr);
}
}
return cols;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class ColumnPrunerProcCtx method handleFilterUnionChildren.
/**
* If the input filter operator has direct child(ren) which are union operator,
* and the filter's column is not the same as union's
* create select operator between them. The select operator has same number of columns as
* pruned child operator.
*
* @param curOp
* The filter operator which need to handle children.
* @throws SemanticException
*/
public void handleFilterUnionChildren(Operator<? extends OperatorDesc> curOp) throws SemanticException {
if (curOp.getChildOperators() == null || !(curOp instanceof FilterOperator)) {
return;
}
List<FieldNode> parentPrunList = prunedColLists.get(curOp);
if (parentPrunList == null || parentPrunList.size() == 0) {
return;
}
List<FieldNode> prunList = null;
for (Operator<? extends OperatorDesc> child : curOp.getChildOperators()) {
if (child instanceof UnionOperator) {
prunList = genColLists(child);
if (prunList == null || prunList.size() == 0 || parentPrunList.size() == prunList.size()) {
continue;
}
ArrayList<ExprNodeDesc> exprs = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<ColumnInfo> outputRS = new ArrayList<ColumnInfo>();
for (ColumnInfo colInfo : child.getSchema().getSignature()) {
if (lookupColumn(prunList, colInfo.getInternalName()) == null) {
continue;
}
ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
exprs.add(colDesc);
outputColNames.add(colInfo.getInternalName());
ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
newCol.setAlias(colInfo.getAlias());
outputRS.add(newCol);
colExprMap.put(colInfo.getInternalName(), colDesc);
}
SelectDesc select = new SelectDesc(exprs, outputColNames, false);
curOp.removeChild(child);
SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(select, new RowSchema(outputRS), curOp);
OperatorFactory.makeChild(sel, child);
sel.setColumnExprMap(colExprMap);
}
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class ColumnPrunerProcFactory method getPruneReduceSinkOpRetainFlags.
private static boolean[] getPruneReduceSinkOpRetainFlags(List<String> retainedParentOpOutputCols, ReduceSinkOperator reduce) {
ReduceSinkDesc reduceConf = reduce.getConf();
java.util.ArrayList<ExprNodeDesc> originalValueEval = reduceConf.getValueCols();
boolean[] flags = new boolean[originalValueEval.size()];
for (int i = 0; i < originalValueEval.size(); i++) {
flags[i] = false;
List<String> current = originalValueEval.get(i).getCols();
if (current == null || current.size() == 0) {
flags[i] = true;
} else {
for (int j = 0; j < current.size(); j++) {
if (retainedParentOpOutputCols.contains(current.get(j))) {
flags[i] = true;
break;
}
}
}
}
return flags;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class ConstantPropagateProcFactory method foldExprFull.
/**
* Fold input expression desc.
*
* This function recursively checks if any subexpression of a specified expression
* can be evaluated to be constant and replaces such subexpression with the constant.
* If the expression is a deterministic UDF and all the subexpressions are constants,
* the value will be calculated immediately (during compilation time vs. runtime).
* e.g.:
* concat(year, month) => 200112 for year=2001, month=12 since concat is deterministic UDF
* unix_timestamp(time) => unix_timestamp(123) for time=123 since unix_timestamp is nondeterministic UDF
* @param desc folding expression
* @param constants current propagated constant map
* @param cppCtx
* @param op processing operator
* @param propagate if true, assignment expressions will be added to constants.
* @return fold expression
* @throws UDFArgumentException
*/
private static ExprNodeDesc foldExprFull(ExprNodeDesc desc, Map<ColumnInfo, ExprNodeDesc> constants, ConstantPropagateProcCtx cppCtx, Operator<? extends Serializable> op, int tag, boolean propagate) throws UDFArgumentException {
// Combine NOT operator with the child operator. Otherwise, the following optimization
// from bottom up could lead to incorrect result, such as not(x > 3 and x is not null),
// should not be optimized to not(x > 3), but (x <=3 or x is null).
desc = foldNegative(desc);
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
GenericUDF udf = funcDesc.getGenericUDF();
boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass());
List<ExprNodeDesc> newExprs = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc childExpr : desc.getChildren()) {
newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext));
}
// Don't evaluate nondeterministic function since the value can only calculate during runtime.
if (!isDeterministicUdf(udf, newExprs)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evaluate immediately.");
}
((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
return desc;
} else {
// If all child expressions of deterministic function are constants, evaluate such UDF immediately
ExprNodeDesc constant = evaluateFunction(udf, newExprs, desc.getChildren());
if (constant != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Folding expression:" + desc + " -> " + constant);
}
return constant;
} else {
// Check if the function can be short cut.
ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op);
if (shortcut != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Folding expression:" + desc + " -> " + shortcut);
}
return shortcut;
}
((ExprNodeGenericFuncDesc) desc).setChildren(newExprs);
}
// constant, add them to colToConstants as half-deterministic columns.
if (propagate) {
propagate(udf, newExprs, op.getSchema(), constants);
}
}
return desc;
} else if (desc instanceof ExprNodeColumnDesc) {
if (op.getParentOperators() == null || op.getParentOperators().isEmpty()) {
return desc;
}
Operator<? extends Serializable> parent = op.getParentOperators().get(tag);
ExprNodeDesc col = evaluateColumn((ExprNodeColumnDesc) desc, cppCtx, parent);
if (col != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Folding expression:" + desc + " -> " + col);
}
return col;
}
}
return desc;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class ConstantPropagateProcFactory method foldNegative.
/**
* Combines the logical not() operator with the child operator if possible.
* @param desc the expression to be evaluated
* @return the new expression to be replaced
* @throws UDFArgumentException
*/
private static ExprNodeDesc foldNegative(ExprNodeDesc desc) throws UDFArgumentException {
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc;
GenericUDF udf = funcDesc.getGenericUDF();
if (udf instanceof GenericUDFOPNot) {
ExprNodeDesc child = funcDesc.getChildren().get(0);
if (child instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc childDesc = (ExprNodeGenericFuncDesc) child;
GenericUDF childUDF = childDesc.getGenericUDF();
List<ExprNodeDesc> grandChildren = child.getChildren();
if (childUDF instanceof GenericUDFBaseCompare || childUDF instanceof GenericUDFOPNull || childUDF instanceof GenericUDFOPNotNull) {
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(grandChild));
}
return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
} else if (childUDF instanceof GenericUDFOPAnd || childUDF instanceof GenericUDFOPOr) {
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(ExprNodeGenericFuncDesc.newInstance(new GenericUDFOPNot(), Arrays.asList(grandChild))));
}
return ExprNodeGenericFuncDesc.newInstance(childUDF.negative(), newGrandChildren);
} else if (childUDF instanceof GenericUDFOPNot) {
return foldNegative(child.getChildren().get(0));
} else {
// For operator like if() that cannot be handled, leave not() as it
// is and continue processing the children
List<ExprNodeDesc> newGrandChildren = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc grandChild : grandChildren) {
newGrandChildren.add(foldNegative(grandChild));
}
childDesc.setChildren(newGrandChildren);
return funcDesc;
}
}
}
}
return desc;
}
Aggregations