use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class ColumnStatsAutoGatherContext method replaceSelectOperatorProcess.
/**
* @param operator : the select operator in the analyze statement
* @param input : the operator right before FS in the insert overwrite statement
* @throws HiveException
*/
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input) throws HiveException {
RowSchema selRS = operator.getSchema();
ArrayList<ColumnInfo> signature = new ArrayList<>();
OpParseContext inputCtx = sa.opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
ArrayList<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
// 1. deal with non-partition columns
for (int i = 0; i < this.columns.size(); i++) {
ColumnInfo col = columns.get(i);
ExprNodeDesc exprNodeDesc = new ExprNodeColumnDesc(col);
colList.add(exprNodeDesc);
String internalName = selRS.getColumnNames().get(i);
columnNames.add(internalName);
columnExprMap.put(internalName, exprNodeDesc);
signature.add(selRS.getSignature().get(i));
}
// if there is any partition column (in static partition or dynamic
// partition or mixed case)
int dynamicPartBegin = -1;
for (int i = 0; i < partitionColumns.size(); i++) {
ExprNodeDesc exprNodeDesc = null;
String partColName = partitionColumns.get(i).getName();
// 2. deal with static partition columns
if (partSpec != null && partSpec.containsKey(partColName) && partSpec.get(partColName) != null) {
if (dynamicPartBegin > 0) {
throw new SemanticException("Dynamic partition columns should not come before static partition columns.");
}
exprNodeDesc = new ExprNodeConstantDesc(partSpec.get(partColName));
TypeInfo srcType = exprNodeDesc.getTypeInfo();
TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
if (!srcType.equals(destType)) {
// This may be possible when srcType is string but destType is integer
exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
}
} else // 3. dynamic partition columns
{
dynamicPartBegin++;
ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
TypeInfo srcType = col.getType();
TypeInfo destType = selRS.getSignature().get(this.columns.size() + i).getType();
exprNodeDesc = new ExprNodeColumnDesc(col);
if (!srcType.equals(destType)) {
exprNodeDesc = ParseUtils.createConversionCast(exprNodeDesc, (PrimitiveTypeInfo) destType);
}
}
colList.add(exprNodeDesc);
String internalName = selRS.getColumnNames().get(this.columns.size() + i);
columnNames.add(internalName);
columnExprMap.put(internalName, exprNodeDesc);
signature.add(selRS.getSignature().get(this.columns.size() + i));
}
operator.setConf(new SelectDesc(colList, columnNames));
operator.setColumnExprMap(columnExprMap);
selRS.setSignature(signature);
operator.setSchema(selRS);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class IndexPredicateAnalyzer method getColumnExpr.
//Check if ExprNodeColumnDesc is wrapped in expr.
//If so, peel off. Otherwise return itself.
private ExprNodeDesc getColumnExpr(ExprNodeDesc expr) {
if (expr instanceof ExprNodeColumnDesc) {
return expr;
}
ExprNodeGenericFuncDesc funcDesc = null;
if (expr instanceof ExprNodeGenericFuncDesc) {
funcDesc = (ExprNodeGenericFuncDesc) expr;
}
if (null == funcDesc) {
return expr;
}
GenericUDF udf = funcDesc.getGenericUDF();
// check if its a simple cast expression.
if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToVarchar || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp) && funcDesc.getChildren().size() == 1 && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) {
return expr.getChildren().get(0);
}
return expr;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class IndexPredicateAnalyzer method analyzeExpr.
private ExprNodeDesc analyzeExpr(ExprNodeGenericFuncDesc expr, List<IndexSearchCondition> searchConditions, Object... nodeOutputs) throws SemanticException {
if (FunctionRegistry.isOpAnd(expr)) {
assert (nodeOutputs.length >= 2);
List<ExprNodeDesc> residuals = new ArrayList<ExprNodeDesc>();
for (Object residual : nodeOutputs) {
if (null != residual) {
residuals.add((ExprNodeDesc) residual);
}
}
if (residuals.size() == 0) {
return null;
} else if (residuals.size() == 1) {
return residuals.get(0);
} else if (residuals.size() > 1) {
return new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, FunctionRegistry.getGenericUDFForAnd(), residuals);
}
}
GenericUDF genericUDF = expr.getGenericUDF();
if (!(genericUDF instanceof GenericUDFBaseCompare)) {
return expr;
}
ExprNodeDesc expr1 = (ExprNodeDesc) nodeOutputs[0];
ExprNodeDesc expr2 = (ExprNodeDesc) nodeOutputs[1];
// We may need to peel off the GenericUDFBridge that is added by CBO or user
if (expr1.getTypeInfo().equals(expr2.getTypeInfo())) {
expr1 = getColumnExpr(expr1);
expr2 = getColumnExpr(expr2);
}
ExprNodeDesc[] extracted = ExprNodeDescUtils.extractComparePair(expr1, expr2);
if (extracted == null || (extracted.length > 2 && !acceptsFields)) {
return expr;
}
ExprNodeColumnDesc columnDesc;
ExprNodeConstantDesc constantDesc;
if (extracted[0] instanceof ExprNodeConstantDesc) {
genericUDF = genericUDF.flip();
columnDesc = (ExprNodeColumnDesc) extracted[1];
constantDesc = (ExprNodeConstantDesc) extracted[0];
} else {
columnDesc = (ExprNodeColumnDesc) extracted[0];
constantDesc = (ExprNodeConstantDesc) extracted[1];
}
Set<String> allowed = columnToUDFs.get(columnDesc.getColumn());
if (allowed == null) {
return expr;
}
String udfName = genericUDF.getUdfName();
if (!allowed.contains(genericUDF.getUdfName())) {
return expr;
}
String[] fields = null;
if (extracted.length > 2) {
ExprNodeFieldDesc fieldDesc = (ExprNodeFieldDesc) extracted[2];
if (!isValidField(fieldDesc)) {
return expr;
}
fields = ExprNodeDescUtils.extractFields(fieldDesc);
}
// We also need to update the expr so that the index query can be generated.
// Note that, hive does not support UDFToDouble etc in the query text.
List<ExprNodeDesc> list = new ArrayList<ExprNodeDesc>();
list.add(expr1);
list.add(expr2);
ExprNodeGenericFuncDesc indexExpr = new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), list);
searchConditions.add(new IndexSearchCondition(columnDesc, udfName, constantDesc, indexExpr, expr, fields));
// remove it from the residual predicate
return fields == null ? null : expr;
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class HiveGBOpConvUtil method genReduceSideGB2.
private static OpAttr genReduceSideGB2(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException {
ArrayList<String> outputColNames = new ArrayList<String>();
ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
String colOutputName = null;
ReduceSinkOperator rs = (ReduceSinkOperator) inputOpAf.inputs.get(0);
List<ColumnInfo> rsColInfoLst = rs.getSchema().getSignature();
ColumnInfo ci;
// 1. Build GB Keys, grouping set starting position
// 1.1 First Add original GB Keys
ArrayList<ExprNodeDesc> gbKeys = ExprNodeDescUtils.genExprNodeDesc(rs, 0, gbInfo.gbKeys.size() - 1, false, false);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) {
ci = rsColInfoLst.get(i);
colOutputName = gbInfo.outputColNames.get(i);
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false));
colExprMap.put(colOutputName, gbKeys.get(i));
}
// 1.2 Add GrpSet Col
int groupingSetsPosition = -1;
if (inclGrpSetInReduceSide(gbInfo) && gbInfo.grpIdFunctionNeeded) {
groupingSetsPosition = gbKeys.size();
ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false);
gbKeys.add(grpSetColExpr);
colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1);
;
outputColNames.add(colOutputName);
colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true));
colExprMap.put(colOutputName, grpSetColExpr);
}
// 2. Add UDAF
UDAFAttrs udafAttr;
ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
int udafStartPosInGBInfOutputColNames = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() * 2;
int udafStartPosInInputRS = gbInfo.grpSets.isEmpty() ? gbInfo.gbKeys.size() : gbInfo.gbKeys.size() + 1;
for (int i = 0; i < gbInfo.udafAttrs.size(); i++) {
udafAttr = gbInfo.udafAttrs.get(i);
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
aggParameters.add(new ExprNodeColumnDesc(rsColInfoLst.get(udafStartPosInInputRS + i)));
colOutputName = gbInfo.outputColNames.get(udafStartPosInGBInfOutputColNames + i);
outputColNames.add(colOutputName);
Mode udafMode = SemanticAnalyzer.groupByDescModeToUDAFMode(GroupByDesc.Mode.FINAL, udafAttr.isDistinctUDAF);
GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(udafAttr.udafEvaluator, udafMode, aggParameters);
aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode));
colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false));
}
Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
rsGBOp2.setColumnExprMap(colExprMap);
// TODO: Shouldn't we propgate vc? is it vc col from tab or all vc
return new OpAttr("", new HashSet<Integer>(), rsGBOp2);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc in project hive by apache.
the class HiveOpConverter method genInputSelectForUnion.
private Operator<? extends OperatorDesc> genInputSelectForUnion(Operator<? extends OperatorDesc> origInputOp, ArrayList<ColumnInfo> uColumnInfo) throws SemanticException {
Iterator<ColumnInfo> oIter = origInputOp.getSchema().getSignature().iterator();
Iterator<ColumnInfo> uIter = uColumnInfo.iterator();
List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
List<String> colName = new ArrayList<String>();
Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
boolean needSelectOp = false;
while (oIter.hasNext()) {
ColumnInfo oInfo = oIter.next();
ColumnInfo uInfo = uIter.next();
if (!oInfo.isSameColumnForRR(uInfo)) {
needSelectOp = true;
}
ExprNodeDesc column = new ExprNodeColumnDesc(oInfo.getType(), oInfo.getInternalName(), oInfo.getTabAlias(), oInfo.getIsVirtualCol(), oInfo.isSkewedCol());
if (!oInfo.getType().equals(uInfo.getType())) {
column = ParseUtils.createConversionCast(column, (PrimitiveTypeInfo) uInfo.getType());
}
columns.add(column);
colName.add(uInfo.getInternalName());
columnExprMap.put(uInfo.getInternalName(), column);
}
if (needSelectOp) {
return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp);
} else {
return origInputOp;
}
}
Aggregations