use of org.apache.hadoop.hive.ql.parse.ASTNode in project hive by apache.
the class ASTConverter method convert.
private ASTNode convert() throws CalciteSemanticException {
/*
* 1. Walk RelNode Graph; note from, where, gBy.. nodes.
*/
new QBVisitor().go(root);
/*
* 2. convert from node.
*/
QueryBlockInfo qb = convertSource(from);
schema = qb.schema;
hiveAST.from = ASTBuilder.construct(HiveParser.TOK_FROM, "TOK_FROM").add(qb.ast).node();
/*
* 3. convert filterNode
*/
if (where != null) {
ASTNode cond = where.getCondition().accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder()));
hiveAST.where = ASTBuilder.where(cond);
}
/*
* 4. GBy
*/
if (groupBy != null) {
ASTBuilder b;
boolean groupingSetsExpression = false;
Group aggregateType = groupBy.getGroupType();
switch(aggregateType) {
case SIMPLE:
b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY");
break;
case ROLLUP:
b = ASTBuilder.construct(HiveParser.TOK_ROLLUP_GROUPBY, "TOK_ROLLUP_GROUPBY");
break;
case CUBE:
b = ASTBuilder.construct(HiveParser.TOK_CUBE_GROUPBY, "TOK_CUBE_GROUPBY");
break;
case OTHER:
b = ASTBuilder.construct(HiveParser.TOK_GROUPING_SETS, "TOK_GROUPING_SETS");
groupingSetsExpression = true;
break;
default:
throw new CalciteSemanticException("Group type not recognized");
}
HiveAggregate hiveAgg = (HiveAggregate) groupBy;
for (int pos : hiveAgg.getAggregateColumnsOrder()) {
RexInputRef iRef = new RexInputRef(groupBy.getGroupSet().nth(pos), groupBy.getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY));
b.add(iRef.accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder())));
}
for (int pos = 0; pos < groupBy.getGroupCount(); pos++) {
if (!hiveAgg.getAggregateColumnsOrder().contains(pos)) {
RexInputRef iRef = new RexInputRef(groupBy.getGroupSet().nth(pos), groupBy.getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY));
b.add(iRef.accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder())));
}
}
// Grouping sets expressions
if (groupingSetsExpression) {
for (ImmutableBitSet groupSet : groupBy.getGroupSets()) {
ASTBuilder expression = ASTBuilder.construct(HiveParser.TOK_GROUPING_SETS_EXPRESSION, "TOK_GROUPING_SETS_EXPRESSION");
for (int i : groupSet) {
RexInputRef iRef = new RexInputRef(i, groupBy.getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY));
expression.add(iRef.accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder())));
}
b.add(expression);
}
}
if (!groupBy.getGroupSet().isEmpty()) {
hiveAST.groupBy = b.node();
}
schema = new Schema(schema, groupBy);
}
/*
* 5. Having
*/
if (having != null) {
ASTNode cond = having.getCondition().accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder()));
hiveAST.having = ASTBuilder.having(cond);
}
/*
* 6. Project
*/
ASTBuilder b = ASTBuilder.construct(HiveParser.TOK_SELECT, "TOK_SELECT");
if (select instanceof Project) {
List<RexNode> childExps = ((Project) select).getChildExps();
if (childExps.isEmpty()) {
RexLiteral r = select.getCluster().getRexBuilder().makeExactLiteral(new BigDecimal(1));
ASTNode selectExpr = ASTBuilder.selectExpr(ASTBuilder.literal(r), "1");
b.add(selectExpr);
} else {
int i = 0;
for (RexNode r : childExps) {
ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral, select.getCluster().getRexBuilder()));
String alias = select.getRowType().getFieldNames().get(i++);
ASTNode selectExpr = ASTBuilder.selectExpr(expr, alias);
b.add(selectExpr);
}
}
hiveAST.select = b.node();
} else {
// select is UDTF
HiveTableFunctionScan udtf = (HiveTableFunctionScan) select;
List<ASTNode> children = new ArrayList<>();
RexCall call = (RexCall) udtf.getCall();
for (RexNode r : call.getOperands()) {
ASTNode expr = r.accept(new RexVisitor(schema, r instanceof RexLiteral, select.getCluster().getRexBuilder()));
children.add(expr);
}
ASTBuilder sel = ASTBuilder.construct(HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
ASTNode function = buildUDTFAST(call.getOperator().getName(), children);
sel.add(function);
for (String alias : udtf.getRowType().getFieldNames()) {
sel.add(HiveParser.Identifier, alias);
}
b.add(sel);
hiveAST.select = b.node();
}
/*
* 7. Order Use in Order By from the block above. RelNode has no pointer to
* parent hence we need to go top down; but OB at each block really belong
* to its src/from. Hence the need to pass in sort for each block from
* its parent.
* 8. Limit
*/
convertOrderLimitToASTNode((HiveSortLimit) orderLimit);
return hiveAST.getAST();
}
use of org.apache.hadoop.hive.ql.parse.ASTNode in project hive by apache.
the class ASTConverter method convertOrderLimitToASTNode.
private void convertOrderLimitToASTNode(HiveSortLimit order) {
if (order != null) {
HiveSortLimit hiveSortLimit = order;
if (!hiveSortLimit.getCollation().getFieldCollations().isEmpty()) {
// 1 Add order by token
ASTNode orderAst = ASTBuilder.createAST(HiveParser.TOK_ORDERBY, "TOK_ORDERBY");
schema = new Schema(hiveSortLimit);
Map<Integer, RexNode> obRefToCallMap = hiveSortLimit.getInputRefToCallMap();
RexNode obExpr;
ASTNode astCol;
for (RelFieldCollation c : hiveSortLimit.getCollation().getFieldCollations()) {
// 2 Add Direction token
ASTNode directionAST = c.getDirection() == RelFieldCollation.Direction.ASCENDING ? ASTBuilder.createAST(HiveParser.TOK_TABSORTCOLNAMEASC, "TOK_TABSORTCOLNAMEASC") : ASTBuilder.createAST(HiveParser.TOK_TABSORTCOLNAMEDESC, "TOK_TABSORTCOLNAMEDESC");
ASTNode nullDirectionAST;
// Null direction
if (c.nullDirection == RelFieldCollation.NullDirection.FIRST) {
nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST");
directionAST.addChild(nullDirectionAST);
} else if (c.nullDirection == RelFieldCollation.NullDirection.LAST) {
nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST");
directionAST.addChild(nullDirectionAST);
} else {
// Default
if (c.getDirection() == RelFieldCollation.Direction.ASCENDING) {
nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_FIRST, "TOK_NULLS_FIRST");
directionAST.addChild(nullDirectionAST);
} else {
nullDirectionAST = ASTBuilder.createAST(HiveParser.TOK_NULLS_LAST, "TOK_NULLS_LAST");
directionAST.addChild(nullDirectionAST);
}
}
// 3 Convert OB expr (OB Expr is usually an input ref except for top
// level OB; top level OB will have RexCall kept in a map.)
obExpr = null;
if (obRefToCallMap != null)
obExpr = obRefToCallMap.get(c.getFieldIndex());
if (obExpr != null) {
astCol = obExpr.accept(new RexVisitor(schema, false, order.getCluster().getRexBuilder()));
} else {
ColumnInfo cI = schema.get(c.getFieldIndex());
/*
* The RowResolver setup for Select drops Table associations. So
* setup ASTNode on unqualified name.
*/
astCol = ASTBuilder.unqualifiedName(cI.column);
}
// 4 buildup the ob expr AST
nullDirectionAST.addChild(astCol);
orderAst.addChild(directionAST);
}
hiveAST.order = orderAst;
}
RexNode offsetExpr = hiveSortLimit.getOffsetExpr();
RexNode fetchExpr = hiveSortLimit.getFetchExpr();
if (fetchExpr != null) {
Object offset = (offsetExpr == null) ? new Integer(0) : ((RexLiteral) offsetExpr).getValue2();
Object fetch = ((RexLiteral) fetchExpr).getValue2();
hiveAST.limit = ASTBuilder.limit(offset, fetch);
}
}
}
use of org.apache.hadoop.hive.ql.parse.ASTNode in project hive by apache.
the class ExprNodeConverter method visitOver.
@Override
public ExprNodeDesc visitOver(RexOver over) {
if (!deep) {
return null;
}
final RexWindow window = over.getWindow();
final WindowSpec windowSpec = new WindowSpec();
final PartitioningSpec partitioningSpec = getPSpec(window);
windowSpec.setPartitioning(partitioningSpec);
final WindowFrameSpec windowFrameSpec = getWindowRange(window);
windowSpec.setWindowFrame(windowFrameSpec);
WindowFunctionSpec wfs = new WindowFunctionSpec();
wfs.setWindowSpec(windowSpec);
final Schema schema = new Schema(tabAlias, inputRowType.getFieldList());
final ASTNode wUDAFAst = new ASTConverter.RexVisitor(schema).visitOver(over);
wfs.setExpression(wUDAFAst);
ASTNode nameNode = (ASTNode) wUDAFAst.getChild(0);
wfs.setName(nameNode.getText());
for (int i = 1; i < wUDAFAst.getChildCount() - 1; i++) {
ASTNode child = (ASTNode) wUDAFAst.getChild(i);
wfs.addArg(child);
}
if (wUDAFAst.getText().equals("TOK_FUNCTIONSTAR")) {
wfs.setStar(true);
}
String columnAlias = getWindowColumnAlias();
wfs.setAlias(columnAlias);
this.windowFunctionSpecs.add(wfs);
return new ExprNodeColumnDesc(TypeConverter.convert(over.getType()), columnAlias, tabAlias, false);
}
use of org.apache.hadoop.hive.ql.parse.ASTNode in project hive by apache.
the class HiveGBOpConvUtil method getGBInfo.
// For each of the GB op in the logical GB this should be called seperately;
// otherwise GBevaluator and expr nodes may get shared among multiple GB ops
private static GBInfo getGBInfo(HiveAggregate aggRel, OpAttr inputOpAf, HiveConf hc) throws SemanticException {
GBInfo gbInfo = new GBInfo();
// 0. Collect AggRel output col Names
gbInfo.outputColNames.addAll(aggRel.getRowType().getFieldNames());
// 1. Collect GB Keys
RelNode aggInputRel = aggRel.getInput();
ExprNodeConverter exprConv = new ExprNodeConverter(inputOpAf.tabAlias, aggInputRel.getRowType(), new HashSet<Integer>(), aggRel.getCluster().getTypeFactory(), true);
ExprNodeDesc tmpExprNodeDesc;
for (int i : aggRel.getGroupSet()) {
RexInputRef iRef = new RexInputRef(i, aggInputRel.getRowType().getFieldList().get(i).getType());
tmpExprNodeDesc = iRef.accept(exprConv);
gbInfo.gbKeys.add(tmpExprNodeDesc);
gbInfo.gbKeyColNamesInInput.add(aggInputRel.getRowType().getFieldNames().get(i));
gbInfo.gbKeyTypes.add(tmpExprNodeDesc.getTypeInfo());
}
// 2. Collect Grouping Set info
if (aggRel.getGroupType() != Group.SIMPLE) {
// 2.1 Translate Grouping set col bitset
ImmutableList<ImmutableBitSet> lstGrpSet = aggRel.getGroupSets();
long bitmap = 0;
for (ImmutableBitSet grpSet : lstGrpSet) {
bitmap = 0;
for (Integer bitIdx : grpSet.asList()) {
bitmap = SemanticAnalyzer.setBit(bitmap, bitIdx);
}
gbInfo.grpSets.add(bitmap);
}
Collections.sort(gbInfo.grpSets);
// 2.2 Check if GRpSet require additional MR Job
gbInfo.grpSetRqrAdditionalMRJob = gbInfo.grpSets.size() > hc.getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY);
// 2.3 Check if GROUPING_ID needs to be projected out
if (!aggRel.getAggCallList().isEmpty() && (aggRel.getAggCallList().get(aggRel.getAggCallList().size() - 1).getAggregation() == HiveGroupingID.INSTANCE)) {
gbInfo.grpIdFunctionNeeded = true;
}
}
// 3. Walk through UDAF & Collect Distinct Info
Set<Integer> distinctRefs = new HashSet<Integer>();
Map<Integer, Integer> distParamInRefsToOutputPos = new HashMap<Integer, Integer>();
for (AggregateCall aggCall : aggRel.getAggCallList()) {
if ((aggCall.getAggregation() == HiveGroupingID.INSTANCE) || !aggCall.isDistinct()) {
continue;
}
List<Integer> argLst = new ArrayList<Integer>(aggCall.getArgList());
List<String> argNames = HiveCalciteUtil.getFieldNames(argLst, aggInputRel);
ExprNodeDesc distinctExpr;
for (int i = 0; i < argLst.size(); i++) {
if (!distinctRefs.contains(argLst.get(i))) {
distinctRefs.add(argLst.get(i));
distinctExpr = HiveCalciteUtil.getExprNode(argLst.get(i), aggInputRel, exprConv);
// Only distinct nodes that are NOT part of the key should be added to distExprNodes
if (ExprNodeDescUtils.indexOf(distinctExpr, gbInfo.gbKeys) < 0) {
distParamInRefsToOutputPos.put(argLst.get(i), gbInfo.distExprNodes.size());
gbInfo.distExprNodes.add(distinctExpr);
gbInfo.distExprNames.add(argNames.get(i));
gbInfo.distExprTypes.add(distinctExpr.getTypeInfo());
}
}
}
}
// 4. Walk through UDAF & Collect UDAF Info
Set<Integer> deDupedNonDistIrefsSet = new HashSet<Integer>();
for (AggregateCall aggCall : aggRel.getAggCallList()) {
if (aggCall.getAggregation() == HiveGroupingID.INSTANCE) {
continue;
}
UDAFAttrs udafAttrs = new UDAFAttrs();
List<ExprNodeDesc> argExps = HiveCalciteUtil.getExprNodes(aggCall.getArgList(), aggInputRel, inputOpAf.tabAlias);
udafAttrs.udafParams.addAll(argExps);
udafAttrs.udafName = aggCall.getAggregation().getName();
udafAttrs.argList = aggCall.getArgList();
udafAttrs.isDistinctUDAF = aggCall.isDistinct();
List<Integer> argLst = new ArrayList<Integer>(aggCall.getArgList());
List<Integer> distColIndicesOfUDAF = new ArrayList<Integer>();
List<Integer> distUDAFParamsIndxInDistExprs = new ArrayList<Integer>();
for (int i = 0; i < argLst.size(); i++) {
// NOTE: distinct expr can be part of of GB key
if (udafAttrs.isDistinctUDAF) {
ExprNodeDesc argExpr = argExps.get(i);
Integer found = ExprNodeDescUtils.indexOf(argExpr, gbInfo.gbKeys);
distColIndicesOfUDAF.add(found < 0 ? distParamInRefsToOutputPos.get(argLst.get(i)) + gbInfo.gbKeys.size() + (gbInfo.grpSets.size() > 0 ? 1 : 0) : found);
distUDAFParamsIndxInDistExprs.add(distParamInRefsToOutputPos.get(argLst.get(i)));
} else {
// TODO: this seems wrong (following what Hive Regular does)
if (!distParamInRefsToOutputPos.containsKey(argLst.get(i)) && !deDupedNonDistIrefsSet.contains(argLst.get(i))) {
deDupedNonDistIrefsSet.add(argLst.get(i));
gbInfo.deDupedNonDistIrefs.add(udafAttrs.udafParams.get(i));
}
}
}
if (udafAttrs.isDistinctUDAF) {
gbInfo.containsDistinctAggr = true;
udafAttrs.udafParamsIndxInGBInfoDistExprs = distUDAFParamsIndxInDistExprs;
gbInfo.distColIndices.add(distColIndicesOfUDAF);
}
// special handling for count, similar to PlanModifierForASTConv::replaceEmptyGroupAggr()
udafAttrs.udafEvaluator = SemanticAnalyzer.getGenericUDAFEvaluator(udafAttrs.udafName, new ArrayList<ExprNodeDesc>(udafAttrs.udafParams), new ASTNode(), udafAttrs.isDistinctUDAF, udafAttrs.udafParams.size() == 0 && "count".equalsIgnoreCase(udafAttrs.udafName) ? true : false);
gbInfo.udafAttrs.add(udafAttrs);
}
// 4. Gather GB Memory threshold
gbInfo.groupByMemoryUsage = HiveConf.getFloatVar(hc, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
gbInfo.memoryThreshold = HiveConf.getFloatVar(hc, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD);
// 5. Gather GB Physical pipeline (based on user config & Grping Sets size)
gbInfo.gbPhysicalPipelineMode = getAggOPMode(hc, gbInfo);
return gbInfo;
}
use of org.apache.hadoop.hive.ql.parse.ASTNode in project hive by apache.
the class CreateTableHook method preAnalyze.
@Override
public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) throws SemanticException {
Hive db;
try {
db = context.getHive();
} catch (HiveException e) {
throw new SemanticException("Couldn't get Hive DB instance in semantic analysis phase.", e);
}
// Analyze and create tbl properties object
int numCh = ast.getChildCount();
tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0));
boolean likeTable = false;
StorageFormat format = new StorageFormat(context.getConf());
for (int num = 1; num < numCh; num++) {
ASTNode child = (ASTNode) ast.getChild(num);
if (format.fillStorageFormat(child)) {
if (org.apache.commons.lang.StringUtils.isNotEmpty(format.getStorageHandler())) {
return ast;
}
continue;
}
switch(child.getToken().getType()) {
case // CTAS
HiveParser.TOK_QUERY:
throw new SemanticException("Operation not supported. Create table as " + "Select is not a valid operation.");
case HiveParser.TOK_ALTERTABLE_BUCKETS:
break;
case HiveParser.TOK_LIKETABLE:
likeTable = true;
break;
case HiveParser.TOK_IFNOTEXISTS:
try {
List<String> tables = db.getTablesByPattern(tableName);
if (tables != null && tables.size() > 0) {
// exists
return ast;
}
} catch (HiveException e) {
throw new SemanticException(e);
}
break;
case HiveParser.TOK_TABLEPARTCOLS:
List<FieldSchema> partCols = BaseSemanticAnalyzer.getColumns(child, false);
for (FieldSchema fs : partCols) {
if (!fs.getType().equalsIgnoreCase("string")) {
throw new SemanticException("Operation not supported. HCatalog only " + "supports partition columns of type string. " + "For column: " + fs.getName() + " Found type: " + fs.getType());
}
}
break;
}
}
if (!likeTable && (format.getInputFormat() == null || format.getOutputFormat() == null)) {
throw new SemanticException("STORED AS specification is either incomplete or incorrect.");
}
return ast;
}
Aggregations