use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class SemanticAnalyzer method genGroupByPlanReduceSinkOperator2MR.
/**
* Generate the second ReduceSinkOperator for the Group By Plan
* (parseInfo.getXXX(dest)). The new ReduceSinkOperator will be a child of
* groupByOperatorInfo.
*
* The second ReduceSinkOperator will put the group by keys in the map-reduce
* sort key, and put the partial aggregation results in the map-reduce value.
*
* @param numPartitionFields
* the number of fields in the map-reduce partition key. This should
* always be the same as the number of Group By keys. We should be
* able to remove this parameter since in this phase there is no
* distinct any more.
* @return the new ReduceSinkOperator.
* @throws SemanticException
*/
@SuppressWarnings("nls")
private Operator genGroupByPlanReduceSinkOperator2MR(QBParseInfo parseInfo, String dest, Operator groupByOperatorInfo, int numPartitionFields, int numReducers, boolean groupingSetsPresent) throws SemanticException {
RowResolver reduceSinkInputRowResolver2 = opParseCtx.get(groupByOperatorInfo).getRowResolver();
RowResolver reduceSinkOutputRowResolver2 = new RowResolver();
reduceSinkOutputRowResolver2.setIsExprResolver(true);
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
// Get group-by keys and store in reduceKeys
List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
for (int i = 0; i < grpByExprs.size(); ++i) {
ASTNode grpbyExpr = grpByExprs.get(i);
String field = getColumnInternalName(i);
outputColumnNames.add(field);
TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(grpbyExpr).getType();
ExprNodeColumnDesc inputExpr = new ExprNodeColumnDesc(typeInfo, field, "", false);
reduceKeys.add(inputExpr);
ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + field, typeInfo, "", false);
reduceSinkOutputRowResolver2.putExpression(grpbyExpr, colInfo);
colExprMap.put(colInfo.getInternalName(), inputExpr);
}
// add a key for reduce sink
if (groupingSetsPresent) {
// Note that partitioning fields dont need to change, since it is either
// partitioned randomly, or by all grouping keys + distinct keys
processGroupingSetReduceSinkOperator(reduceSinkInputRowResolver2, reduceSinkOutputRowResolver2, reduceKeys, outputColumnNames, colExprMap);
}
// Get partial aggregation results and store in reduceValues
ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
int inputField = reduceKeys.size();
HashMap<String, ASTNode> aggregationTrees = parseInfo.getAggregationExprsForClause(dest);
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
String field = getColumnInternalName(inputField);
ASTNode t = entry.getValue();
TypeInfo typeInfo = reduceSinkInputRowResolver2.getExpression(t).getType();
ExprNodeColumnDesc exprDesc = new ExprNodeColumnDesc(typeInfo, field, "", false);
reduceValues.add(exprDesc);
inputField++;
String col = getColumnInternalName(reduceValues.size() - 1);
outputColumnNames.add(col);
reduceSinkOutputRowResolver2.putExpression(t, new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + col, typeInfo, "", false));
colExprMap.put(col, exprDesc);
}
ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, numPartitionFields, numReducers, AcidUtils.Operation.NOT_ACID), new RowSchema(reduceSinkOutputRowResolver2.getColumnInfos()), groupByOperatorInfo), reduceSinkOutputRowResolver2);
rsOp.setColumnExprMap(colExprMap);
return rsOp;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class BaseSemanticAnalyzer method validateCheckConstraint.
public static void validateCheckConstraint(List<FieldSchema> cols, List<SQLCheckConstraint> checkConstraints, Configuration conf) throws SemanticException {
// create colinfo and then row resolver
RowResolver rr = new RowResolver();
for (FieldSchema col : cols) {
ColumnInfo ci = new ColumnInfo(col.getName(), TypeInfoUtils.getTypeInfoFromTypeString(col.getType()), null, false);
rr.put(null, col.getName(), ci);
}
TypeCheckCtx typeCheckCtx = new TypeCheckCtx(rr);
// TypeCheckProcFactor expects typecheckctx to have unparse translator
UnparseTranslator unparseTranslator = new UnparseTranslator(conf);
typeCheckCtx.setUnparseTranslator(unparseTranslator);
for (SQLCheckConstraint cc : checkConstraints) {
try {
ParseDriver parseDriver = new ParseDriver();
ASTNode checkExprAST = parseDriver.parseExpression(cc.getCheck_expression());
validateCheckExprAST(checkExprAST);
Map<ASTNode, ExprNodeDesc> genExprs = TypeCheckProcFactory.genExprNode(checkExprAST, typeCheckCtx);
ExprNodeDesc checkExpr = genExprs.get(checkExprAST);
if (checkExpr == null) {
throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Invalid type for CHECK constraint: ") + cc.getCheck_expression());
}
if (checkExpr.getTypeInfo().getTypeName() != serdeConstants.BOOLEAN_TYPE_NAME) {
throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Only boolean type is supported for CHECK constraint: ") + cc.getCheck_expression() + ". Found: " + checkExpr.getTypeInfo().getTypeName());
}
validateCheckExpr(checkExpr);
} catch (Exception e) {
throw new SemanticException(ErrorMsg.INVALID_CSTR_SYNTAX.getMsg("Invalid CHECK constraint expression: ") + cc.getCheck_expression() + ". " + e.getMessage());
}
}
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class CalcitePlanner method handleInsertStatement.
// This function serves as the wrapper of handleInsertStatementSpec in
// SemanticAnalyzer
Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb) throws SemanticException {
ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
for (int i = 0; i < columns.size(); i++) {
ColumnInfo col = columns.get(i);
colList.add(new ExprNodeColumnDesc(col));
}
ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
RowResolver out_rwsch = handleInsertStatementSpec(colList, dest, inputRR, inputRR, qb, selExprList);
ArrayList<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < colList.size(); i++) {
String outputCol = getColumnInternalName(i);
colExprMap.put(outputCol, colList.get(i));
columnNames.add(outputCol);
}
Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
output.setColumnExprMap(colExprMap);
return output;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class TypeCheckProcFactory method processGByExpr.
/**
* Function to do groupby subexpression elimination. This is called by all the
* processors initially. As an example, consider the query select a+b,
* count(1) from T group by a+b; Then a+b is already precomputed in the group
* by operators key, so we substitute a+b in the select list with the internal
* column name of the a+b expression that appears in the in input row
* resolver.
*
* @param nd
* The node that is being inspected.
* @param procCtx
* The processor context.
*
* @return exprNodeColumnDesc.
*/
public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) throws SemanticException {
// We recursively create the exprNodeDesc. Base cases: when we encounter
// a column ref, we convert that into an exprNodeColumnDesc; when we
// encounter
// a constant, we convert that into an exprNodeConstantDesc. For others we
// just
// build the exprNodeFuncDesc with recursively built children.
ASTNode expr = (ASTNode) nd;
TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
// having key in (select .. where a = min(b.value)
if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
return null;
}
RowResolver input = ctx.getInputRR();
ExprNodeDesc desc = null;
if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) {
return null;
}
// If the current subExpression is pre-calculated, as in Group-By etc.
ColumnInfo colInfo = input.getExpression(expr);
// try outer row resolver
RowResolver outerRR = ctx.getOuterRR();
if (colInfo == null && outerRR != null) {
colInfo = outerRR.getExpression(expr);
}
if (colInfo != null) {
desc = new ExprNodeColumnDesc(colInfo);
ASTNode source = input.getExpressionSource(expr);
if (source != null && ctx.getUnparseTranslator() != null) {
ctx.getUnparseTranslator().addCopyTranslation(expr, source);
}
return desc;
}
return desc;
}
use of org.apache.hadoop.hive.ql.exec.ColumnInfo in project hive by apache.
the class HiveGBOpConvUtil method getValueKeysForRS.
/**
* Get Value Keys for RS following MapSide GB
*
* @param GroupByOperator
* MapSide GB
* @param outputKeyColumnNames
* @param colExprMap
* @return List of ExprNodeDesc of Values
* @throws SemanticException
*/
private static ArrayList<ExprNodeDesc> getValueKeysForRS(Operator inOp, int aggStartPos, List<String> outputKeyColumnNames, ArrayList<ColumnInfo> colInfoLst, Map<String, ExprNodeDesc> colExprMap, boolean addEmptyTabAlias, boolean setColToNonVirtual) throws SemanticException {
List<ColumnInfo> mapGBColInfoLst = inOp.getSchema().getSignature();
ArrayList<ExprNodeDesc> valueKeys = null;
if (aggStartPos >= mapGBColInfoLst.size()) {
valueKeys = new ArrayList<ExprNodeDesc>();
} else {
valueKeys = ExprNodeDescUtils.genExprNodeDesc(inOp, aggStartPos, mapGBColInfoLst.size() - 1, true, setColToNonVirtual);
for (int i = 0; i < valueKeys.size(); ++i) {
String outputColName = SemanticAnalyzer.getColumnInternalName(i);
outputKeyColumnNames.add(outputColName);
// TODO: Verify if this is needed (Why can't it be always null/empty
String tabAlias = addEmptyTabAlias ? "" : null;
ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + outputColName, valueKeys.get(i).getTypeInfo(), tabAlias, false);
colInfoLst.add(colInfo);
colExprMap.put(colInfo.getInternalName(), valueKeys.get(i));
}
}
return valueKeys;
}
Aggregations