use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode in project flink by apache.
the class HiveParserCalcitePlanner method genFilterLogicalPlan.
private RelNode genFilterLogicalPlan(HiveParserQB qb, RelNode srcRel, Map<String, Integer> outerNameToPosMap, HiveParserRowResolver outerRR) throws SemanticException {
RelNode filterRel = null;
Iterator<HiveParserASTNode> whereClauseIterator = qb.getParseInfo().getDestToWhereExpr().values().iterator();
if (whereClauseIterator.hasNext()) {
filterRel = genFilterRelNode(qb, (HiveParserASTNode) whereClauseIterator.next().getChild(0), srcRel, outerNameToPosMap, outerRR, false);
}
return filterRel;
}
use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode in project flink by apache.
the class HiveParserCalcitePlanner method genLateralViewPlan.
private RelNode genLateralViewPlan(HiveParserQB qb, Map<String, RelNode> aliasToRel) throws SemanticException {
Map<String, ArrayList<HiveParserASTNode>> aliasToLateralViews = qb.getParseInfo().getAliasToLateralViews();
Preconditions.checkArgument(aliasToLateralViews.size() == 1, "We only support lateral views for 1 alias");
Map.Entry<String, ArrayList<HiveParserASTNode>> entry = aliasToLateralViews.entrySet().iterator().next();
String alias = entry.getKey();
RelNode res = null;
List<HiveParserASTNode> lateralViews = entry.getValue();
for (HiveParserASTNode lateralView : lateralViews) {
Preconditions.checkArgument(lateralView.getChildCount() == 2);
final boolean isOuter = lateralView.getType() == HiveASTParser.TOK_LATERAL_VIEW_OUTER;
// this is the 1st lateral view
if (res == null) {
// LHS can be table or sub-query
res = aliasToRel.get(alias);
}
Preconditions.checkState(res != null, "Failed to decide LHS table for current lateral view");
HiveParserRowResolver inputRR = relToRowResolver.get(res);
HiveParserUtils.LateralViewInfo info = HiveParserUtils.extractLateralViewInfo(lateralView, inputRR, semanticAnalyzer, frameworkConfig, cluster);
HiveParserRexNodeConverter rexNodeConverter = new HiveParserRexNodeConverter(cluster, res.getRowType(), relToHiveColNameCalcitePosMap.get(res), 0, false, funcConverter);
List<RexNode> operands = new ArrayList<>(info.getOperands().size());
for (ExprNodeDesc exprDesc : info.getOperands()) {
operands.add(rexNodeConverter.convert(exprDesc).accept(funcConverter));
}
res = genUDTFPlan(info.getSqlOperator(), info.getFuncName(), info.getTabAlias(), info.getColAliases(), qb, operands, info.getOperandColInfos(), res, false, isOuter);
}
return res;
}
use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode in project flink by apache.
the class HiveParserCalcitePlanner method genGBLogicalPlan.
// Generate GB plan.
private RelNode genGBLogicalPlan(HiveParserQB qb, RelNode srcRel) throws SemanticException {
RelNode gbRel = null;
HiveParserQBParseInfo qbp = qb.getParseInfo();
// 1. Gather GB Expressions (AST) (GB + Aggregations)
// NOTE: Multi Insert is not supported
String detsClauseName = qbp.getClauseNames().iterator().next();
HiveParserASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName);
HiveParserSubQueryUtils.checkForTopLevelSubqueries(selExprList);
if (selExprList.getToken().getType() == HiveASTParser.TOK_SELECTDI && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) {
HiveParserASTNode node = (HiveParserASTNode) selExprList.getChild(0).getChild(0);
if (node.getToken().getType() == HiveASTParser.TOK_ALLCOLREF) {
srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null, null);
HiveParserRowResolver rr = relToRowResolver.get(srcRel);
qbp.setSelExprForClause(detsClauseName, HiveParserUtils.genSelectDIAST(rr));
}
}
// Select DISTINCT + windowing; GBy handled by genSelectForWindowing
if (selExprList.getToken().getType() == HiveASTParser.TOK_SELECTDI && !qb.getAllWindowingSpecs().isEmpty()) {
return null;
}
List<HiveParserASTNode> gbAstExprs = getGroupByForClause(qbp, detsClauseName);
HashMap<String, HiveParserASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName);
boolean hasGrpByAstExprs = !gbAstExprs.isEmpty();
boolean hasAggregationTrees = aggregationTrees != null && !aggregationTrees.isEmpty();
final boolean cubeRollupGrpSetPresent = !qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty();
// 2. Sanity check
if (semanticAnalyzer.getConf().getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) && qbp.getDistinctFuncExprsForClause(detsClauseName).size() > 1) {
throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg());
}
if (hasGrpByAstExprs || hasAggregationTrees) {
ArrayList<ExprNodeDesc> gbExprNodeDescs = new ArrayList<>();
ArrayList<String> outputColNames = new ArrayList<>();
// 3. Input, Output Row Resolvers
HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
HiveParserRowResolver outputRR = new HiveParserRowResolver();
outputRR.setIsExprResolver(true);
if (hasGrpByAstExprs) {
// 4. Construct GB Keys (ExprNode)
for (HiveParserASTNode gbAstExpr : gbAstExprs) {
Map<HiveParserASTNode, ExprNodeDesc> astToExprNodeDesc = semanticAnalyzer.genAllExprNodeDesc(gbAstExpr, inputRR);
ExprNodeDesc grpbyExprNDesc = astToExprNodeDesc.get(gbAstExpr);
if (grpbyExprNDesc == null) {
throw new SemanticException("Invalid Column Reference: " + gbAstExpr.dump());
}
addToGBExpr(outputRR, inputRR, gbAstExpr, grpbyExprNDesc, gbExprNodeDescs, outputColNames);
}
}
// 5. GroupingSets, Cube, Rollup
int numGroupCols = gbExprNodeDescs.size();
List<Integer> groupingSets = null;
if (cubeRollupGrpSetPresent) {
if (qbp.getDestRollups().contains(detsClauseName)) {
groupingSets = getGroupingSetsForRollup(gbAstExprs.size());
} else if (qbp.getDestCubes().contains(detsClauseName)) {
groupingSets = getGroupingSetsForCube(gbAstExprs.size());
} else if (qbp.getDestGroupingSets().contains(detsClauseName)) {
groupingSets = getGroupingSets(gbAstExprs, qbp, detsClauseName);
}
}
// 6. Construct aggregation function Info
ArrayList<AggInfo> aggInfos = new ArrayList<>();
if (hasAggregationTrees) {
for (HiveParserASTNode value : aggregationTrees.values()) {
// 6.1 Determine type of UDAF
// This is the GenericUDAF name
String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveASTParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveASTParser.TOK_FUNCTIONSTAR;
// 6.2 Convert UDAF Params to ExprNodeDesc
ArrayList<ExprNodeDesc> aggParameters = new ArrayList<>();
for (int i = 1; i < value.getChildCount(); i++) {
HiveParserASTNode paraExpr = (HiveParserASTNode) value.getChild(i);
ExprNodeDesc paraExprNode = semanticAnalyzer.genExprNodeDesc(paraExpr, inputRR);
aggParameters.add(paraExprNode);
}
GenericUDAFEvaluator.Mode aggMode = HiveParserUtils.groupByDescModeToUDAFMode(GroupByDesc.Mode.COMPLETE, isDistinct);
GenericUDAFEvaluator genericUDAFEvaluator = HiveParserUtils.getGenericUDAFEvaluator(aggName, aggParameters, value, isDistinct, isAllColumns, frameworkConfig.getOperatorTable());
assert (genericUDAFEvaluator != null);
HiveParserBaseSemanticAnalyzer.GenericUDAFInfo udaf = HiveParserUtils.getGenericUDAFInfo(genericUDAFEvaluator, aggMode, aggParameters);
String aggAlias = null;
if (value.getParent().getType() == HiveASTParser.TOK_SELEXPR && value.getParent().getChildCount() == 2) {
aggAlias = unescapeIdentifier(value.getParent().getChild(1).getText().toLowerCase());
}
AggInfo aggInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct, isAllColumns, aggAlias);
aggInfos.add(aggInfo);
String field = aggAlias == null ? getColumnInternalName(numGroupCols + aggInfos.size() - 1) : aggAlias;
outputColNames.add(field);
outputRR.putExpression(value, new ColumnInfo(field, aggInfo.getReturnType(), "", false));
}
}
// grouping sets
if (groupingSets != null && !groupingSets.isEmpty()) {
String field = getColumnInternalName(numGroupCols + aggInfos.size());
outputColNames.add(field);
outputRR.put(null, VirtualColumn.GROUPINGID.getName(), new ColumnInfo(field, // flink grouping_id's return type is bigint
TypeInfoFactory.longTypeInfo, null, true));
}
// 8. We create the group_by operator
gbRel = genGBRelNode(gbExprNodeDescs, aggInfos, groupingSets, srcRel);
relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(outputRR));
relToRowResolver.put(gbRel, outputRR);
}
return gbRel;
}
use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode in project flink by apache.
the class HiveParserCalcitePlanner method genSetOpLogicalPlan.
@SuppressWarnings("nls")
private RelNode genSetOpLogicalPlan(HiveParserQBExpr.Opcode opcode, String alias, String leftalias, RelNode leftRel, String rightalias, RelNode rightRel) throws SemanticException {
// 1. Get Row Resolvers, Column map for original left and right input of SetOp Rel
HiveParserRowResolver leftRR = relToRowResolver.get(leftRel);
HiveParserRowResolver rightRR = relToRowResolver.get(rightRel);
HashMap<String, ColumnInfo> leftMap = leftRR.getFieldMap(leftalias);
HashMap<String, ColumnInfo> rightMap = rightRR.getFieldMap(rightalias);
// 2. Validate that SetOp is feasible according to Hive (by using type info from RR)
if (leftMap.size() != rightMap.size()) {
throw new SemanticException("Schema of both sides of union should match.");
}
// 3. construct SetOp Output RR using original left & right Input
HiveParserRowResolver setOpOutRR = new HiveParserRowResolver();
Iterator<Map.Entry<String, ColumnInfo>> lIter = leftMap.entrySet().iterator();
Iterator<Map.Entry<String, ColumnInfo>> rIter = rightMap.entrySet().iterator();
while (lIter.hasNext()) {
Map.Entry<String, ColumnInfo> lEntry = lIter.next();
Map.Entry<String, ColumnInfo> rEntry = rIter.next();
ColumnInfo lInfo = lEntry.getValue();
ColumnInfo rInfo = rEntry.getValue();
String field = lEntry.getKey();
// try widening conversion, otherwise fail union
TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType());
if (commonTypeInfo == null) {
HiveParserASTNode tabRef = getQB().getAliases().isEmpty() ? null : getQB().getParseInfo().getSrcForAlias(getQB().getAliases().get(0));
throw new SemanticException(generateErrorMessage(tabRef, "Schema of both sides of setop should match: Column " + field + " is of type " + lInfo.getType().getTypeName() + " on first table and type " + rInfo.getType().getTypeName() + " on second table"));
}
ColumnInfo setOpColInfo = new ColumnInfo(lInfo);
setOpColInfo.setType(commonTypeInfo);
setOpOutRR.put(alias, field, setOpColInfo);
}
// 4. Determine which columns requires cast on left/right input (Calcite requires exact
// types on both sides of SetOp)
boolean leftNeedsTypeCast = false;
boolean rightNeedsTypeCast = false;
List<RexNode> leftProjs = new ArrayList<>();
List<RexNode> rightProjs = new ArrayList<>();
List<RelDataTypeField> leftFields = leftRel.getRowType().getFieldList();
List<RelDataTypeField> rightFields = rightRel.getRowType().getFieldList();
for (int i = 0; i < leftFields.size(); i++) {
RelDataType leftFieldType = leftFields.get(i).getType();
RelDataType rightFieldType = rightFields.get(i).getType();
if (!leftFieldType.equals(rightFieldType)) {
RelDataType unionFieldType = HiveParserUtils.toRelDataType(setOpOutRR.getColumnInfos().get(i).getType(), cluster.getTypeFactory());
if (!unionFieldType.equals(leftFieldType)) {
leftNeedsTypeCast = true;
}
leftProjs.add(cluster.getRexBuilder().ensureType(unionFieldType, cluster.getRexBuilder().makeInputRef(leftFieldType, i), true));
if (!unionFieldType.equals(rightFieldType)) {
rightNeedsTypeCast = true;
}
rightProjs.add(cluster.getRexBuilder().ensureType(unionFieldType, cluster.getRexBuilder().makeInputRef(rightFieldType, i), true));
} else {
leftProjs.add(cluster.getRexBuilder().ensureType(leftFieldType, cluster.getRexBuilder().makeInputRef(leftFieldType, i), true));
rightProjs.add(cluster.getRexBuilder().ensureType(rightFieldType, cluster.getRexBuilder().makeInputRef(rightFieldType, i), true));
}
}
// parity
if (leftNeedsTypeCast) {
leftRel = LogicalProject.create(leftRel, Collections.emptyList(), leftProjs, leftRel.getRowType().getFieldNames());
}
if (rightNeedsTypeCast) {
rightRel = LogicalProject.create(rightRel, Collections.emptyList(), rightProjs, rightRel.getRowType().getFieldNames());
}
// 6. Construct SetOp Rel
List<RelNode> leftAndRight = Arrays.asList(leftRel, rightRel);
SetOp setOpRel;
switch(opcode) {
case UNION:
setOpRel = LogicalUnion.create(leftAndRight, true);
break;
case INTERSECT:
setOpRel = LogicalIntersect.create(leftAndRight, false);
break;
case INTERSECTALL:
setOpRel = LogicalIntersect.create(leftAndRight, true);
break;
case EXCEPT:
setOpRel = LogicalMinus.create(leftAndRight, false);
break;
case EXCEPTALL:
setOpRel = LogicalMinus.create(leftAndRight, true);
break;
default:
throw new SemanticException("Unsupported set operator " + opcode.toString());
}
relToRowResolver.put(setOpRel, setOpOutRR);
relToHiveColNameCalcitePosMap.put(setOpRel, buildHiveToCalciteColumnMap(setOpOutRR));
return setOpRel;
}
use of org.apache.flink.table.planner.delegation.hive.copy.HiveParserASTNode in project flink by apache.
the class HiveParserCalcitePlanner method genOBLogicalPlan.
private Pair<Sort, RelNode> genOBLogicalPlan(HiveParserQB qb, RelNode srcRel, boolean outermostOB) throws SemanticException {
Sort sortRel = null;
RelNode originalOBInput = null;
HiveParserQBParseInfo qbp = qb.getParseInfo();
String dest = qbp.getClauseNames().iterator().next();
HiveParserASTNode obAST = qbp.getOrderByForClause(dest);
if (obAST != null) {
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be specified
Integer limit = qb.getParseInfo().getDestLimit(dest);
if (limit == null) {
String mapRedMode = semanticAnalyzer.getConf().getVar(HiveConf.ConfVars.HIVEMAPREDMODE);
boolean banLargeQuery = Boolean.parseBoolean(semanticAnalyzer.getConf().get("hive.strict.checks.large.query", "false"));
if ("strict".equalsIgnoreCase(mapRedMode) || banLargeQuery) {
throw new SemanticException(generateErrorMessage(obAST, "Order by-s without limit"));
}
}
// 2. Walk through OB exprs and extract field collations and additional
// virtual columns needed
final List<RexNode> virtualCols = new ArrayList<>();
final List<RelFieldCollation> fieldCollations = new ArrayList<>();
int fieldIndex;
List<Node> obASTExprLst = obAST.getChildren();
HiveParserASTNode obASTExpr;
HiveParserASTNode nullOrderASTExpr;
List<Pair<HiveParserASTNode, TypeInfo>> vcASTAndType = new ArrayList<>();
HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
HiveParserRowResolver outputRR = new HiveParserRowResolver();
HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), relToHiveColNameCalcitePosMap.get(srcRel), 0, false, funcConverter);
int numSrcFields = srcRel.getRowType().getFieldCount();
for (Node node : obASTExprLst) {
// 2.1 Convert AST Expr to ExprNode
obASTExpr = (HiveParserASTNode) node;
nullOrderASTExpr = (HiveParserASTNode) obASTExpr.getChild(0);
HiveParserASTNode ref = (HiveParserASTNode) nullOrderASTExpr.getChild(0);
Map<HiveParserASTNode, ExprNodeDesc> astToExprNodeDesc = semanticAnalyzer.genAllExprNodeDesc(ref, inputRR);
ExprNodeDesc obExprNodeDesc = astToExprNodeDesc.get(ref);
if (obExprNodeDesc == null) {
throw new SemanticException("Invalid order by expression: " + obASTExpr.toString());
}
// 2.2 Convert ExprNode to RexNode
RexNode rexNode = converter.convert(obExprNodeDesc).accept(funcConverter);
// present in the child (& hence we add a child Project Rel)
if (rexNode instanceof RexInputRef) {
fieldIndex = ((RexInputRef) rexNode).getIndex();
} else {
fieldIndex = numSrcFields + virtualCols.size();
virtualCols.add(rexNode);
vcASTAndType.add(new Pair<>(ref, obExprNodeDesc.getTypeInfo()));
}
// 2.4 Determine the Direction of order by
RelFieldCollation.Direction direction = RelFieldCollation.Direction.DESCENDING;
if (obASTExpr.getType() == HiveASTParser.TOK_TABSORTCOLNAMEASC) {
direction = RelFieldCollation.Direction.ASCENDING;
}
RelFieldCollation.NullDirection nullOrder;
if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_FIRST) {
nullOrder = RelFieldCollation.NullDirection.FIRST;
} else if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_LAST) {
nullOrder = RelFieldCollation.NullDirection.LAST;
} else {
throw new SemanticException("Unexpected null ordering option: " + nullOrderASTExpr.getType());
}
// 2.5 Add to field collations
fieldCollations.add(new RelFieldCollation(fieldIndex, direction, nullOrder));
}
// 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel
// for top constraining Sel
RelNode obInputRel = srcRel;
if (!virtualCols.isEmpty()) {
List<RexNode> originalInputRefs = srcRel.getRowType().getFieldList().stream().map(input -> new RexInputRef(input.getIndex(), input.getType())).collect(Collectors.toList());
HiveParserRowResolver obSyntheticProjectRR = new HiveParserRowResolver();
if (!HiveParserRowResolver.add(obSyntheticProjectRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
int vcolPos = inputRR.getRowSchema().getSignature().size();
for (Pair<HiveParserASTNode, TypeInfo> astTypePair : vcASTAndType) {
obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo(getColumnInternalName(vcolPos), astTypePair.getValue(), null, false));
vcolPos++;
}
obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, virtualCols), obSyntheticProjectRR, srcRel);
if (outermostOB) {
if (!HiveParserRowResolver.add(outputRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
} else {
if (!HiveParserRowResolver.add(outputRR, obSyntheticProjectRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
}
originalOBInput = srcRel;
} else {
if (!HiveParserRowResolver.add(outputRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
}
// 4. Construct SortRel
RelTraitSet traitSet = cluster.traitSet();
RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations));
sortRel = LogicalSort.create(obInputRel, canonizedCollation, null, null);
// 5. Update the maps
Map<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToRowResolver.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
}
return (new Pair<>(sortRel, originalOBInput));
}
Aggregations