use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class TezCompiler method markSemiJoinForDPP.
private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) throws SemanticException {
if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
// Not needed without semi-join reduction
return;
}
// Stores the Tablescan operators processed to avoid redoing them.
Map<TableScanOperator, TableScanOperator> tsOps = new HashMap<>();
Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo();
for (ReduceSinkOperator rs : map.keySet()) {
SemiJoinBranchInfo sjInfo = map.get(rs);
TableScanOperator ts = sjInfo.getTsOp();
TableScanOperator tsInMap = tsOps.putIfAbsent(ts, ts);
if (tsInMap != null) {
// Already processed, skip
continue;
}
if (sjInfo.getIsHint() || !sjInfo.getShouldRemove()) {
continue;
}
// A TS can have multiple branches due to DPP Or Semijoin Opt.
// Use DFS to traverse all the branches until RS or DPP is hit.
Deque<Operator<?>> deque = new LinkedList<>();
deque.add(ts);
while (!deque.isEmpty()) {
Operator<?> op = deque.pollLast();
if (op instanceof AppMasterEventOperator && ((AppMasterEventOperator) op).getConf() instanceof DynamicPruningEventDesc) {
// DPP. Now look up nDVs on both sides to see the selectivity.
// <Parent Ops>-SEL-GB1-RS1-GB2-RS2
SelectOperator selOp = null;
try {
selOp = (SelectOperator) (rs.getParentOperators().get(0).getParentOperators().get(0).getParentOperators().get(0).getParentOperators().get(0));
} catch (NullPointerException e) {
LOG.warn("markSemiJoinForDPP : Null pointer exception caught while accessing semijoin operators");
assert false;
return;
}
try {
// If stats are not available, just assume its a useful edge
Statistics stats = selOp.getStatistics();
ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(selOp.getConf().getColList().get(0));
long nDVs = stats.getColumnStatisticsFromColName(colExpr.getColumn()).getCountDistint();
if (nDVs > 0) {
// Lookup nDVs on TS side.
RuntimeValuesInfo rti = procCtx.parseContext.getRsToRuntimeValuesInfoMap().get(rs);
ExprNodeDesc tsExpr = rti.getTsColExpr();
FilterOperator fil = (FilterOperator) (ts.getChildOperators().get(0));
Statistics filStats = fil.getStatistics();
ExprNodeColumnDesc tsColExpr = ExprNodeDescUtils.getColumnExpr(tsExpr);
long nDVsOfTS = filStats.getColumnStatisticsFromColName(tsColExpr.getColumn()).getCountDistint();
double nDVsOfTSFactored = nDVsOfTS * procCtx.conf.getFloatVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_DPP_FACTOR);
if ((long) nDVsOfTSFactored > nDVs) {
if (LOG.isDebugEnabled()) {
LOG.debug("nDVs = " + nDVs + ", nDVsOfTS = " + nDVsOfTS + " and nDVsOfTSFactored = " + nDVsOfTSFactored + "Adding semijoin branch from ReduceSink " + rs + " to TS " + sjInfo.getTsOp());
}
sjInfo.setShouldRemove(false);
}
}
} catch (NullPointerException e) {
sjInfo.setShouldRemove(false);
}
break;
}
if (op instanceof ReduceSinkOperator) {
// Done with this branch
continue;
}
deque.addAll(op.getChildOperators());
}
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class TypeCheckProcFactory method processGByExpr.
/**
* Function to do groupby subexpression elimination. This is called by all the
* processors initially. As an example, consider the query select a+b,
* count(1) from T group by a+b; Then a+b is already precomputed in the group
* by operators key, so we substitute a+b in the select list with the internal
* column name of the a+b expression that appears in the in input row
* resolver.
*
* @param nd
* The node that is being inspected.
* @param procCtx
* The processor context.
*
* @return exprNodeColumnDesc.
*/
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
// We recursively create the exprNodeDesc. Base cases: when we encounter
// a column ref, we convert that into an exprNodeColumnDesc; when we
// encounter
// a constant, we convert that into an exprNodeConstantDesc. For others we
// just
// build the exprNodeFuncDesc with recursively built children.
ASTNode expr = (ASTNode) nd;
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
// having key in (select .. where a = min(b.value)
if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
return null;
}
RowResolver input = ctx.getInputRR();
ExprNodeDesc desc = null;
if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
return null;
}
// If the current subExpression is pre-calculated, as in Group-By etc.
ColumnInfo colInfo = input.getExpression(expr);
// try outer row resolver
RowResolver outerRR = ctx.getOuterRR();
if (colInfo == null && outerRR != null) {
colInfo = outerRR.getExpression(expr);
}
if (colInfo != null) {
desc = new ExprNodeColumnDesc(colInfo);
ASTNode source = input.getExpressionSource(expr);
if (source != null && ctx.getUnparseTranslator() != null) {
ctx.getUnparseTranslator().addCopyTranslation(expr, source);
}
return desc;
}
return desc;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class Vectorizer method validateExprNodeDescRecursive.
private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, VectorExpressionDescriptor.Mode mode, boolean allowComplex) {
if (desc instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
String columnName = c.getColumn();
if (availableVectorizedVirtualColumnSet != null) {
// For Map, check for virtual columns.
VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
if (virtualColumn != null) {
if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) {
setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported");
return false;
}
// Remember we used this one in the query.
neededVirtualColumnSet.add(virtualColumn);
}
}
}
String typeName = desc.getTypeInfo().getTypeName();
boolean ret = validateDataType(typeName, mode, allowComplex && isVectorizationComplexTypesEnabled);
if (!ret) {
setExpressionIssue(expressionTitle, getValidateDataTypeErrorMsg(typeName, mode, allowComplex, isVectorizationComplexTypesEnabled));
return false;
}
boolean isInExpression = false;
if (desc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc;
boolean r = validateGenericUdf(d);
if (!r) {
setExpressionIssue(expressionTitle, "UDF " + d + " not supported");
return false;
}
GenericUDF genericUDF = d.getGenericUDF();
isInExpression = (genericUDF instanceof GenericUDFIn);
}
if (desc.getChildren() != null) {
if (isInExpression && desc.getChildren().get(0).getTypeInfo().getCategory() == Category.STRUCT) {
// Always use loose FILTER mode.
if (!validateStructInExpression(desc, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) {
return false;
}
} else {
for (ExprNodeDesc d : desc.getChildren()) {
// Always use loose FILTER mode.
if (!validateExprNodeDescRecursive(d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */
true)) {
return false;
}
}
}
}
return true;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class PartitionPruner method removeNonPartCols.
/**
* See compactExpr. Some things in the expr are replaced with nulls for pruner, however
* the virtual columns are not removed (ExprNodeColumnDesc cannot tell them apart from
* partition columns), so we do it here.
* The expression is only used to prune by partition name, so we have no business with VCs.
* @param expr original partition pruning expression.
* @param partCols list of partition columns for the table.
* @param referred partition columns referred by expr
* @return partition pruning expression that only contains partition columns from the list.
*/
private static ExprNodeDesc removeNonPartCols(ExprNodeDesc expr, List<String> partCols, Set<String> referred) {
if (expr instanceof ExprNodeFieldDesc) {
// list or struct fields.
return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
} else if (expr instanceof ExprNodeColumnDesc) {
String column = ((ExprNodeColumnDesc) expr).getColumn();
if (!partCols.contains(column)) {
// Column doesn't appear to be a partition column for the table.
return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
}
referred.add(column);
} else if (expr instanceof ExprNodeGenericFuncDesc) {
List<ExprNodeDesc> children = expr.getChildren();
for (int i = 0; i < children.size(); ++i) {
ExprNodeDesc other = removeNonPartCols(children.get(i), partCols, referred);
if (ExprNodeDescUtils.isNullConstant(other)) {
if (FunctionRegistry.isOpAnd(expr)) {
// partcol=... AND nonpartcol=... is replaced with partcol=... AND TRUE
// which will be folded to partcol=...
// This cannot be done also for OR
Preconditions.checkArgument(expr.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo));
other = new ExprNodeConstantDesc(expr.getTypeInfo(), true);
} else {
// and cause overaggressive prunning, missing data (incorrect result)
return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
}
}
children.set(i, other);
}
}
return expr;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class DropPartitionHandler method genPartSpecs.
private Map<Integer, List<ExprNodeGenericFuncDesc>> genPartSpecs(Table table, List<Map<String, String>> partitions) throws SemanticException {
Map<Integer, List<ExprNodeGenericFuncDesc>> partSpecs = new HashMap<>();
int partPrefixLength = 0;
if (partitions.size() > 0) {
partPrefixLength = partitions.get(0).size();
// pick the length of the first ptn, we expect all ptns listed to have the same number of
// key-vals.
}
List<ExprNodeGenericFuncDesc> partitionDesc = new ArrayList<>();
for (Map<String, String> ptn : partitions) {
// convert each key-value-map to appropriate expression.
ExprNodeGenericFuncDesc expr = null;
for (Map.Entry<String, String> kvp : ptn.entrySet()) {
String key = kvp.getKey();
Object val = kvp.getValue();
String type = table.getPartColByName(key).getType();
PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(type);
ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, key, null, true);
ExprNodeGenericFuncDesc op = DDLSemanticAnalyzer.makeBinaryPredicate("=", column, new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, val));
expr = (expr == null) ? op : DDLSemanticAnalyzer.makeBinaryPredicate("and", expr, op);
}
if (expr != null) {
partitionDesc.add(expr);
}
}
if (partitionDesc.size() > 0) {
partSpecs.put(partPrefixLength, partitionDesc);
}
return partSpecs;
}
Aggregations