use of org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc in project hive by apache.
the class ColumnPrunerProcCtx method getNestedColsFromExprNodeDesc.
private static void getNestedColsFromExprNodeDesc(ExprNodeDesc desc, FieldNode pathToRoot, List<FieldNode> paths) {
if (desc instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) desc;
FieldNode p = new FieldNode(columnDesc.getColumn());
checkListAndMap(columnDesc, pathToRoot, p);
paths.add(p);
} else if (desc instanceof ExprNodeFieldDesc) {
ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) desc;
ExprNodeDesc childDesc = fieldDesc.getDesc();
FieldNode p = new FieldNode(fieldDesc.getFieldName());
checkListAndMap(fieldDesc, pathToRoot, p);
getNestedColsFromExprNodeDesc(childDesc, p, paths);
} else {
List<ExprNodeDesc> children = desc.getChildren();
if (children != null) {
for (ExprNodeDesc c : children) {
getNestedColsFromExprNodeDesc(c, pathToRoot, paths);
}
}
}
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc in project hive by apache.
the class StatsUtils method getColStatisticsFromExpression.
/**
* Get column statistics expression nodes
* @param conf
* - hive conf
* @param parentStats
* - parent statistics
* @param end
* - expression nodes
* @return column statistics
*/
public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, ExprNodeDesc end) {
if (end == null) {
return null;
}
String colName = null;
String colType = null;
double avgColSize = 0;
long countDistincts = 0;
long numNulls = 0;
ObjectInspector oi = end.getWritableObjectInspector();
long numRows = parentStats.getNumRows();
if (end instanceof ExprNodeColumnDesc) {
// column projection
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
colName = encd.getColumn();
if (encd.getIsPartitionColOrVirtualCol()) {
ColStatistics colStats = parentStats.getColumnStatisticsFromColName(colName);
if (colStats != null) {
/* If statistics for the column already exist use it. */
return colStats.clone();
}
// virtual columns
colType = encd.getTypeInfo().getTypeName();
countDistincts = numRows;
} else {
// clone the column stats and return
ColStatistics result = parentStats.getColumnStatisticsFromColName(colName);
if (result != null) {
return result.clone();
}
return null;
}
} else if (end instanceof ExprNodeConstantDesc) {
return buildColStatForConstant(conf, numRows, (ExprNodeConstantDesc) end);
} else if (end instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end;
colName = engfd.getName();
colType = engfd.getTypeString();
// If it is a widening cast, we do not change NDV, min, max
if (isWideningCast(engfd) && engfd.getChildren().get(0) instanceof ExprNodeColumnDesc) {
// cast on single column
ColStatistics stats = parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0));
if (stats != null) {
ColStatistics newStats;
newStats = stats.clone();
newStats.setColumnName(colName);
colType = colType.toLowerCase();
newStats.setColumnType(colType);
newStats.setAvgColLen(getAvgColLenOf(conf, oi, colType));
return newStats;
}
}
if (conf.getBoolVar(ConfVars.HIVE_STATS_ESTIMATORS_ENABLE)) {
Optional<StatEstimatorProvider> sep = engfd.getGenericUDF().adapt(StatEstimatorProvider.class);
if (sep.isPresent()) {
StatEstimator se = sep.get().getStatEstimator();
List<ColStatistics> csList = new ArrayList<ColStatistics>();
for (ExprNodeDesc child : engfd.getChildren()) {
ColStatistics cs = getColStatisticsFromExpression(conf, parentStats, child);
if (cs == null) {
break;
}
csList.add(cs);
}
if (csList.size() == engfd.getChildren().size()) {
Optional<ColStatistics> res = se.estimate(csList);
if (res.isPresent()) {
ColStatistics newStats = res.get();
colType = colType.toLowerCase();
newStats.setColumnType(colType);
newStats.setColumnName(colName);
return newStats;
}
}
}
}
// fallback to default
countDistincts = getNDVFor(engfd, numRows, parentStats);
} else if (end instanceof ExprNodeColumnListDesc) {
// column list
ExprNodeColumnListDesc encd = (ExprNodeColumnListDesc) end;
colName = Joiner.on(",").join(encd.getCols());
colType = serdeConstants.LIST_TYPE_NAME;
countDistincts = numRows;
} else if (end instanceof ExprNodeFieldDesc) {
// field within complex type
ExprNodeFieldDesc enfd = (ExprNodeFieldDesc) end;
colName = enfd.getFieldName();
colType = enfd.getTypeString();
countDistincts = numRows;
} else if (end instanceof ExprDynamicParamDesc) {
// possible to create colstats object
return null;
} else {
throw new IllegalArgumentException("not supported expr type " + end.getClass());
}
colType = colType.toLowerCase();
avgColSize = getAvgColLenOf(conf, oi, colType);
ColStatistics colStats = new ColStatistics(colName, colType);
colStats.setAvgColLen(avgColSize);
colStats.setCountDistint(countDistincts);
colStats.setNumNulls(numNulls);
return colStats;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc in project hive by apache.
the class TestExecDriver method populateMapRedPlan3.
/**
* test reduce with multiple tagged inputs.
*/
@SuppressWarnings("unchecked")
private void populateMapRedPlan3(Table src, Table src2) throws SemanticException {
List<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
// map-side work
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, Byte.valueOf((byte) 0), 1, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
addMapWork(mr, src, "a", op1);
Operator<ReduceSinkDesc> op2 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("key")), outputColumns, true, Byte.valueOf((byte) 1), Integer.MAX_VALUE, -1, AcidUtils.Operation.NOT_ACID, NullOrdering.NULLS_LAST));
addMapWork(mr, src2, "b", op2);
ReduceWork rWork = new ReduceWork();
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setNeedsTagging(true);
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
mr.setReduceWork(rWork);
rWork.getTagToValueDesc().add(op2.getConf().getValueSerializeInfo());
// reduce side work
Operator<FileSinkDesc> op4 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(TMPDIR + File.separator + "mapredplan3.out"), Utilities.defaultTd, false));
Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(Utilities.makeList(new ExprNodeFieldDesc(TypeInfoFactory.stringTypeInfo, new ExprNodeColumnDesc(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo), Utilities.ReduceField.VALUE.toString(), "", false), "0", false)), Utilities.makeList(outputColumns.get(0))), op4);
rWork.setReducer(op5);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc in project hive by apache.
the class SemanticAnalyzer method genAllExprNodeDesc.
/**
* Generates all of the expression node descriptors for the expression and children of it
* passed in the arguments. This function uses the row resolver and the metadata information
* that are passed as arguments to resolve the column names to internal names.
*
* @param expr
* The expression
* @param input
* The row resolver
* @param tcCtx
* Customized type-checking context
* @return expression to exprNodeDesc mapping
* @throws SemanticException Failed to evaluate expression
*/
@SuppressWarnings("nls")
Map<ASTNode, ExprNodeDesc> genAllExprNodeDesc(ASTNode expr, RowResolver input, TypeCheckCtx tcCtx) throws SemanticException {
// Create the walker and the rules dispatcher.
tcCtx.setUnparseTranslator(unparseTranslator);
Map<ASTNode, ExprNodeDesc> nodeOutputs = ExprNodeTypeCheck.genExprNode(expr, tcCtx);
ExprNodeDesc desc = nodeOutputs.get(expr);
if (desc == null) {
String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText());
ColumnInfo colInfo = input.get(null, tableOrCol);
String errMsg;
if (colInfo == null && input.getIsExprResolver()) {
errMsg = ASTErrorUtils.getMsg(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY.getMsg(), expr);
} else {
errMsg = tcCtx.getError();
}
throw new SemanticException(Optional.ofNullable(errMsg).orElse("Error in parsing "));
}
if (desc instanceof ExprNodeColumnListDesc) {
throw new SemanticException("TOK_ALLCOLREF is not supported in current context");
}
if (!unparseTranslator.isEnabled()) {
// Not creating a view, so no need to track view expansions.
return nodeOutputs;
}
List<ASTNode> fieldDescList = new ArrayList<>();
for (Map.Entry<ASTNode, ExprNodeDesc> entry : nodeOutputs.entrySet()) {
if (!(entry.getValue() instanceof ExprNodeColumnDesc)) {
// struct<>.
if (entry.getValue() instanceof ExprNodeFieldDesc) {
fieldDescList.add(entry.getKey());
}
continue;
}
ASTNode node = entry.getKey();
ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) entry.getValue();
if ((columnDesc.getTabAlias() == null) || (columnDesc.getTabAlias().length() == 0)) {
// internal expressions used in the representation of aggregation.
continue;
}
String[] tmp = input.reverseLookup(columnDesc.getColumn());
// check if outer present && (tmp is null || tmp not null - contains tbl info)
if (tcCtx.getOuterRR() != null && (tmp == null || (tmp[0] != null && columnDesc.getTabAlias() != null && !tmp[0].equals(columnDesc.getTabAlias())))) {
tmp = tcCtx.getOuterRR().reverseLookup(columnDesc.getColumn());
}
StringBuilder replacementText = new StringBuilder();
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
unparseTranslator.addTranslation(node, replacementText.toString());
}
for (ASTNode node : fieldDescList) {
Map<ASTNode, String> map = translateFieldDesc(node);
for (Entry<ASTNode, String> entry : map.entrySet()) {
unparseTranslator.addTranslation(entry.getKey(), entry.getValue().toLowerCase());
}
}
return nodeOutputs;
}
Aggregations