use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class HiveExceptRewriteRule method onMatch.
// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
final HiveExcept hiveExcept = call.rel(0);
final RelOptCluster cluster = hiveExcept.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
// branch
try {
bldr.add(createFirstGB(hiveExcept.getInputs().get(0), true, cluster, rexBuilder));
bldr.add(createFirstGB(hiveExcept.getInputs().get(1), false, cluster, rexBuilder));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// create a union above all the branches
// the schema of union looks like this
// all keys + VCol + c
HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
// 2nd level GB: create a GB (all keys + sum(c) as a + sum(VCol*c) as b) for
// each branch
final List<RexNode> gbChildProjLst = Lists.newArrayList();
final List<Integer> groupSetPositions = Lists.newArrayList();
int unionColumnSize = union.getRowType().getFieldList().size();
for (int cInd = 0; cInd < unionColumnSize; cInd++) {
gbChildProjLst.add(rexBuilder.makeInputRef(union, cInd));
// the last 2 columns are VCol and c
if (cInd < unionColumnSize - 2) {
groupSetPositions.add(cInd);
}
}
try {
gbChildProjLst.add(multiply(rexBuilder.makeInputRef(union, unionColumnSize - 2), rexBuilder.makeInputRef(union, unionColumnSize - 1), cluster, rexBuilder));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
RelNode gbInputRel = null;
try {
// Here we create a project for the following reasons:
// (1) GBy only accepts arg as a position of the input, however, we need to sum on VCol*c
// (2) This can better reuse the function createSingleArgAggCall.
gbInputRel = HiveProject.create(union, gbChildProjLst, null);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// gbInputRel's schema is like this
// all keys + VCol + c + VCol*c
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
// sum(c)
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("sum", cluster, TypeInfoFactory.longTypeInfo, unionColumnSize - 1, aggFnRetType);
aggregateCalls.add(aggregateCall);
// sum(VCol*c)
aggregateCall = HiveCalciteUtil.createSingleArgAggCall("sum", cluster, TypeInfoFactory.longTypeInfo, unionColumnSize, aggFnRetType);
aggregateCalls.add(aggregateCall);
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, groupSet, null, aggregateCalls);
if (!hiveExcept.all) {
RelNode filterRel = null;
try {
filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, makeFilterExprForExceptDistinct(aggregateRel, unionColumnSize, cluster, rexBuilder));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// finally add a project to project out the last 2 columns
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 2);
projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
try {
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
} else {
List<RexNode> originalInputRefs = Lists.transform(aggregateRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {
@Override
public RexNode apply(RelDataTypeField input) {
return new RexInputRef(input.getIndex(), input.getType());
}
});
List<RexNode> copyInputRefs = new ArrayList<>();
try {
copyInputRefs.add(makeExprForExceptAll(aggregateRel, unionColumnSize, cluster, rexBuilder));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
for (int i = 0; i < originalInputRefs.size() - 2; i++) {
copyInputRefs.add(originalInputRefs.get(i));
}
RelNode srcRel = null;
try {
srcRel = HiveProject.create(aggregateRel, copyInputRefs, null);
HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
// finally add a project to project out the 1st columns
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(0);
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
} catch (SemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class HiveExpandDistinctAggregatesRule method onMatch.
// ~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final Aggregate aggregate = call.rel(0);
int numCountDistinct = getNumCountDistinctCall(aggregate);
if (numCountDistinct == 0) {
return;
}
// Find all of the agg expressions. We use a List (for all count(distinct))
// as well as a Set (for all others) to ensure determinism.
int nonDistinctCount = 0;
List<List<Integer>> argListList = new ArrayList<List<Integer>>();
Set<List<Integer>> argListSets = new LinkedHashSet<List<Integer>>();
Set<Integer> positions = new HashSet<>();
for (AggregateCall aggCall : aggregate.getAggCallList()) {
if (!aggCall.isDistinct()) {
++nonDistinctCount;
continue;
}
ArrayList<Integer> argList = new ArrayList<Integer>();
for (Integer arg : aggCall.getArgList()) {
argList.add(arg);
positions.add(arg);
}
// Aggr checks for sorted argList.
argListList.add(argList);
argListSets.add(argList);
}
Util.permAssert(argListSets.size() > 0, "containsDistinctCall lied");
if (numCountDistinct > 1 && numCountDistinct == aggregate.getAggCallList().size() && aggregate.getGroupSet().isEmpty()) {
LOG.debug("Trigger countDistinct rewrite. numCountDistinct is " + numCountDistinct);
// now positions contains all the distinct positions, i.e., $5, $4, $6
// we need to first sort them as group by set
// and then get their position later, i.e., $4->1, $5->2, $6->3
cluster = aggregate.getCluster();
rexBuilder = cluster.getRexBuilder();
RelNode converted = null;
List<Integer> sourceOfForCountDistinct = new ArrayList<>();
sourceOfForCountDistinct.addAll(positions);
Collections.sort(sourceOfForCountDistinct);
try {
converted = convert(aggregate, argListList, sourceOfForCountDistinct);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
call.transformTo(converted);
return;
}
// If all of the agg expressions are distinct and have the same
// arguments then we can use a more efficient form.
final RelMetadataQuery mq = call.getMetadataQuery();
if ((nonDistinctCount == 0) && (argListSets.size() == 1)) {
for (Integer arg : argListSets.iterator().next()) {
Set<RelColumnOrigin> colOrigs = mq.getColumnOrigins(aggregate, arg);
if (null != colOrigs) {
for (RelColumnOrigin colOrig : colOrigs) {
RelOptHiveTable hiveTbl = (RelOptHiveTable) colOrig.getOriginTable();
if (hiveTbl.getPartColInfoMap().containsKey(colOrig.getOriginColumnOrdinal())) {
// Encountered partitioning column, this will be better handled by MetadataOnly optimizer.
return;
}
}
}
}
RelNode converted = convertMonopole(aggregate, argListSets.iterator().next());
call.transformTo(converted);
return;
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class SemanticAnalyzer method genColListRegex.
@SuppressWarnings("nls")
// TODO: make aliases unique, otherwise needless rewriting takes place
Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, ArrayList<ExprNodeDesc> col_list, HashSet<ColumnInfo> excludeCols, RowResolver input, RowResolver colSrcRR, Integer pos, RowResolver output, List<String> aliases, boolean ensureUniqueCols) throws SemanticException {
if (colSrcRR == null) {
colSrcRR = input;
}
// The table alias should exist
if (tabAlias != null && !colSrcRR.hasTableAlias(tabAlias)) {
throw new SemanticException(ErrorMsg.INVALID_TABLE_ALIAS.getMsg(sel));
}
// TODO: Have to put in the support for AS clause
Pattern regex = null;
try {
regex = Pattern.compile(colRegex, Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel, e.getMessage()));
}
StringBuilder replacementText = new StringBuilder();
int matched = 0;
// ColumnInfos for table alias "".
if (!aliases.contains("")) {
aliases.add("");
}
/*
* track the input ColumnInfos that are added to the output.
* if a columnInfo has multiple mappings; then add the column only once,
* but carry the mappings forward.
*/
Map<ColumnInfo, ColumnInfo> inputColsProcessed = new HashMap<ColumnInfo, ColumnInfo>();
if (colSrcRR.getNamedJoinInfo() != null) {
// We got using() clause in previous join. Need to generate select list as
// per standard. For * we will have joining columns first non-repeated
// followed by other columns.
HashMap<String, ColumnInfo> leftMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(0));
HashMap<String, ColumnInfo> rightMap = colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(1));
HashMap<String, ColumnInfo> chosenMap = null;
if (colSrcRR.getNamedJoinInfo().getHiveJoinType() != JoinType.RIGHTOUTER) {
chosenMap = leftMap;
} else {
chosenMap = rightMap;
}
// first get the columns in named columns
for (String columnName : colSrcRR.getNamedJoinInfo().getNamedColumns()) {
for (Map.Entry<String, ColumnInfo> entry : chosenMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
if (!columnName.equals(colInfo.getAlias())) {
continue;
}
String name = colInfo.getInternalName();
String[] tmp = colSrcRR.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
if (oColInfo == null) {
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol());
col_list.add(expr);
oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
inputColsProcessed.put(colInfo, oColInfo);
}
if (ensureUniqueCols) {
if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings", UnsupportedFeature.Duplicates_in_RR);
}
} else {
output.put(tmp[0], tmp[1], oColInfo);
}
pos = Integer.valueOf(pos.intValue() + 1);
matched++;
if (unparseTranslator.isEnabled() || (tableMask.isEnabled() && analyzeRewrite == null)) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
}
}
}
}
for (String alias : aliases) {
HashMap<String, ColumnInfo> fMap = colSrcRR.getFieldMap(alias);
if (fMap == null) {
continue;
}
// from the input schema
for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
ColumnInfo colInfo = entry.getValue();
if (colSrcRR.getNamedJoinInfo() != null && colSrcRR.getNamedJoinInfo().getNamedColumns().contains(colInfo.getAlias())) {
// we already added this column in select list.
continue;
}
if (excludeCols != null && excludeCols.contains(colInfo)) {
// This was added during plan generation.
continue;
}
// First, look up the column from the source against which * is to be
// resolved.
// We'd later translated this into the column from proper input, if
// it's valid.
// TODO: excludeCols may be possible to remove using the same
// technique.
String name = colInfo.getInternalName();
String[] tmp = colSrcRR.reverseLookup(name);
// Skip the colinfos which are not for this particular alias
if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
continue;
}
if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
continue;
}
// Not matching the regex?
if (!regex.matcher(tmp[1]).matches()) {
continue;
}
// TODO: This is fraught with peril.
if (input != colSrcRR) {
colInfo = input.get(tabAlias, tmp[1]);
if (colInfo == null) {
LOG.error("Cannot find colInfo for " + tabAlias + "." + tmp[1] + ", derived from [" + colSrcRR + "], in [" + input + "]");
throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, tmp[1]);
}
String oldCol = null;
if (LOG.isDebugEnabled()) {
oldCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
}
name = colInfo.getInternalName();
tmp = input.reverseLookup(name);
if (LOG.isDebugEnabled()) {
String newCol = name + " => " + (tmp == null ? "null" : (tmp[0] + "." + tmp[1]));
LOG.debug("Translated [" + oldCol + "] to [" + newCol + "]");
}
}
ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
if (oColInfo == null) {
ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol());
col_list.add(expr);
oColInfo = new ColumnInfo(getColumnInternalName(pos), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
inputColsProcessed.put(colInfo, oColInfo);
}
if (ensureUniqueCols) {
if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
throw new CalciteSemanticException("Cannot add column to RR: " + tmp[0] + "." + tmp[1] + " => " + oColInfo + " due to duplication, see previous warnings", UnsupportedFeature.Duplicates_in_RR);
}
} else {
output.put(tmp[0], tmp[1], oColInfo);
}
pos = Integer.valueOf(pos.intValue() + 1);
matched++;
if (unparseTranslator.isEnabled() || tableMask.isEnabled()) {
if (replacementText.length() > 0) {
replacementText.append(", ");
}
replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
replacementText.append(".");
replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
}
}
}
if (matched == 0) {
throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel));
}
if (unparseTranslator.isEnabled()) {
unparseTranslator.addTranslation(sel, replacementText.toString());
} else if (tableMask.isEnabled()) {
tableMask.addTranslation(sel, replacementText.toString());
}
return pos;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class SqlFunctionConverter method getCalciteFn.
public static SqlOperator getCalciteFn(String hiveUdfName, ImmutableList<RelDataType> calciteArgTypes, RelDataType calciteRetType, boolean deterministic, boolean dynamicFunction) throws CalciteSemanticException {
if (hiveUdfName != null && hiveUdfName.trim().equals("<=>")) {
// this.So, bail out for now.
throw new CalciteSemanticException("<=> is not yet supported for cbo.", UnsupportedFeature.Less_than_equal_greater_than);
}
SqlOperator calciteOp;
CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType);
switch(hiveUdfName) {
// TODO: Perhaps we should do this for all functions, not just +,-
case "-":
calciteOp = new SqlMonotonicBinaryOperator("-", SqlKind.MINUS, 40, true, uInf.returnTypeInference, uInf.operandTypeInference, OperandTypes.MINUS_OPERATOR);
break;
case "+":
calciteOp = new SqlMonotonicBinaryOperator("+", SqlKind.PLUS, 40, true, uInf.returnTypeInference, uInf.operandTypeInference, OperandTypes.PLUS_OPERATOR);
break;
default:
calciteOp = hiveToCalcite.get(hiveUdfName);
if (null == calciteOp) {
calciteOp = new CalciteSqlFn(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference, uInf.operandTypeInference, uInf.operandTypeChecker, SqlFunctionCategory.USER_DEFINED_FUNCTION, deterministic, dynamicFunction);
}
break;
}
return calciteOp;
}
Aggregations