use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class PlanModifierUtil method fixTopOBSchema.
protected static void fixTopOBSchema(final RelNode rootRel, Pair<RelNode, RelNode> topSelparentPair, List<FieldSchema> resultSchema, boolean replaceProject) throws CalciteSemanticException {
if (!(topSelparentPair.getKey() instanceof Sort) || !HiveCalciteUtil.orderRelNode(topSelparentPair.getKey())) {
return;
}
HiveSortLimit obRel = (HiveSortLimit) topSelparentPair.getKey();
Project obChild = (Project) topSelparentPair.getValue();
if (obChild.getRowType().getFieldCount() <= resultSchema.size()) {
return;
}
RelDataType rt = obChild.getRowType();
@SuppressWarnings({ "unchecked", "rawtypes" }) Set<Integer> collationInputRefs = new HashSet(RelCollations.ordinals(obRel.getCollation()));
ImmutableMap.Builder<Integer, RexNode> inputRefToCallMapBldr = ImmutableMap.builder();
for (int i = resultSchema.size(); i < rt.getFieldCount(); i++) {
if (collationInputRefs.contains(i)) {
RexNode obyExpr = obChild.getChildExps().get(i);
if (obyExpr instanceof RexCall) {
LOG.debug("Old RexCall : " + obyExpr);
obyExpr = adjustOBSchema((RexCall) obyExpr, obChild, resultSchema);
LOG.debug("New RexCall : " + obyExpr);
}
inputRefToCallMapBldr.put(i, obyExpr);
}
}
ImmutableMap<Integer, RexNode> inputRefToCallMap = inputRefToCallMapBldr.build();
if ((obChild.getRowType().getFieldCount() - inputRefToCallMap.size()) != resultSchema.size()) {
LOG.error(generateInvalidSchemaMessage(obChild, resultSchema, inputRefToCallMap.size()));
throw new CalciteSemanticException("Result Schema didn't match Optimized Op Tree Schema");
}
if (replaceProject) {
// This removes order-by only expressions from the projections.
HiveProject replacementProjectRel = HiveProject.create(obChild.getInput(), obChild.getChildExps().subList(0, resultSchema.size()), obChild.getRowType().getFieldNames().subList(0, resultSchema.size()));
obRel.replaceInput(0, replacementProjectRel);
}
obRel.setInputRefToCallMap(inputRefToCallMap);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class RexNodeConverter method convert.
protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException {
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory);
PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
ConstantObjectInspector coi = literal.getWritableObjectInspector();
Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(), coi);
RexNode calciteLiteral = null;
// If value is null, the type should also be VOID.
if (value == null) {
hiveTypeCategory = PrimitiveCategory.VOID;
}
// TODO: Verify if we need to use ConstantObjectInspector to unwrap data
switch(hiveTypeCategory) {
case BOOLEAN:
calciteLiteral = rexBuilder.makeLiteral(((Boolean) value).booleanValue());
break;
case BYTE:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Byte) value), calciteDataType);
break;
case SHORT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), calciteDataType);
break;
case INT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value));
break;
case LONG:
calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value));
break;
// TODO: is Decimal an exact numeric or approximate numeric?
case DECIMAL:
if (value instanceof HiveDecimal) {
value = ((HiveDecimal) value).bigDecimalValue();
} else if (value instanceof Decimal128) {
value = ((Decimal128) value).toBigDecimal();
}
if (value == null) {
// literals.
throw new CalciteSemanticException("Expression " + literal.getExprString() + " is not a valid decimal", UnsupportedFeature.Invalid_decimal);
// TODO: return createNullLiteral(literal);
}
BigDecimal bd = (BigDecimal) value;
BigInteger unscaled = bd.unscaledValue();
if (unscaled.compareTo(MIN_LONG_BI) >= 0 && unscaled.compareTo(MAX_LONG_BI) <= 0) {
calciteLiteral = rexBuilder.makeExactLiteral(bd);
} else {
// CBO doesn't support unlimited precision decimals. In practice, this
// will work...
// An alternative would be to throw CboSemanticException and fall back
// to no CBO.
RelDataType relType = cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, unscaled.toString().length(), bd.scale());
calciteLiteral = rexBuilder.makeExactLiteral(bd, relType);
}
break;
case FLOAT:
calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Float.toString((Float) value)), calciteDataType);
break;
case DOUBLE:
// TODO: The best solution is to support NaN in expression reduction.
if (Double.isNaN((Double) value)) {
throw new CalciteSemanticException("NaN", UnsupportedFeature.Invalid_decimal);
}
calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Double.toString((Double) value)), calciteDataType);
break;
case CHAR:
if (value instanceof HiveChar) {
value = ((HiveChar) value).getValue();
}
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case VARCHAR:
if (value instanceof HiveVarchar) {
value = ((HiveVarchar) value).getValue();
}
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case STRING:
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case DATE:
Calendar cal = new GregorianCalendar();
cal.setTime((Date) value);
calciteLiteral = rexBuilder.makeDateLiteral(cal);
break;
case TIMESTAMP:
Calendar c = null;
if (value instanceof Calendar) {
c = (Calendar) value;
} else {
c = Calendar.getInstance();
c.setTimeInMillis(((Timestamp) value).getTime());
}
calciteLiteral = rexBuilder.makeTimestampLiteral(c, RelDataType.PRECISION_NOT_SPECIFIED);
break;
case INTERVAL_YEAR_MONTH:
// Calcite year-month literal value is months as BigDecimal
BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths());
calciteLiteral = rexBuilder.makeIntervalLiteral(totalMonths, new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
break;
case INTERVAL_DAY_TIME:
// Calcite day-time interval is millis value as BigDecimal
// Seconds converted to millis
BigDecimal secsValueBd = BigDecimal.valueOf(((HiveIntervalDayTime) value).getTotalSeconds() * 1000);
// Nanos converted to millis
BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveIntervalDayTime) value).getNanos(), 6);
calciteLiteral = rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new SqlParserPos(1, 1)));
break;
case VOID:
calciteLiteral = cluster.getRexBuilder().makeLiteral(null, cluster.getTypeFactory().createSqlType(SqlTypeName.NULL), true);
break;
case BINARY:
case UNKNOWN:
default:
throw new RuntimeException("UnSupported Literal");
}
return calciteLiteral;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class HiveRelMdSelectivity method computeInnerJoinSelectivity.
private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode predicate) {
Pair<Boolean, RexNode> predInfo = getCombinedPredicateForJoin(j, predicate);
if (!predInfo.getKey()) {
return new FilterSelectivityEstimator(j).estimateSelectivity(predInfo.getValue());
}
RexNode combinedPredicate = predInfo.getValue();
JoinPredicateInfo jpi;
try {
jpi = JoinPredicateInfo.constructJoinPredicateInfo(j, combinedPredicate);
} catch (CalciteSemanticException e) {
throw new RuntimeException(e);
}
ImmutableMap.Builder<Integer, Double> colStatMapBuilder = ImmutableMap.builder();
ImmutableMap<Integer, Double> colStatMap;
int rightOffSet = j.getLeft().getRowType().getFieldCount();
// Join which are part of join keys
for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
colStatMapBuilder.put(ljk, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), mq, ljk));
}
// Join which are part of join keys
for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
colStatMapBuilder.put(rjk + rightOffSet, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), mq, rjk));
}
colStatMap = colStatMapBuilder.build();
// 3. Walk through the Join Condition Building NDV for selectivity
// NDV of the join can not exceed the cardinality of cross join.
List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements();
int noOfPE = peLst.size();
double ndvCrossProduct = 1;
if (noOfPE > 0) {
ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
if (j instanceof SemiJoin) {
ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()), ndvCrossProduct);
} else if (j instanceof HiveJoin) {
ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()) * mq.getRowCount(j.getRight()), ndvCrossProduct);
} else {
throw new RuntimeException("Unexpected Join type: " + j.getClass().getName());
}
}
// 4. Join Selectivity = 1/NDV
return (1 / ndvCrossProduct);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class HiveIntersectRewriteRule method onMatch.
// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
final HiveIntersect hiveIntersect = call.rel(0);
final RelOptCluster cluster = hiveIntersect.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
int numOfBranch = hiveIntersect.getInputs().size();
Builder<RelNode> bldr = new ImmutableList.Builder<RelNode>();
// 1st level GB: create a GB (col0, col1, count(1) as c) for each branch
for (int index = 0; index < numOfBranch; index++) {
RelNode input = hiveIntersect.getInputs().get(index);
final List<RexNode> gbChildProjLst = Lists.newArrayList();
final List<Integer> groupSetPositions = Lists.newArrayList();
for (int cInd = 0; cInd < input.getRowType().getFieldList().size(); cInd++) {
gbChildProjLst.add(rexBuilder.makeInputRef(input, cInd));
groupSetPositions.add(cInd);
}
gbChildProjLst.add(rexBuilder.makeBigintLiteral(new BigDecimal(1)));
// create the project before GB because we need a new project with extra column '1'.
RelNode gbInputRel = null;
try {
gbInputRel = HiveProject.create(input, gbChildProjLst, null);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
// groupSetPosition includes all the positions
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
// count(1), 1's position is input.getRowType().getFieldList().size()
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, input.getRowType().getFieldList().size(), aggFnRetType);
aggregateCalls.add(aggregateCall);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), gbInputRel, false, groupSet, null, aggregateCalls);
bldr.add(aggregateRel);
}
// create a union above all the branches
HiveRelNode union = new HiveUnion(cluster, TraitsUtil.getDefaultTraitSet(cluster), bldr.build());
// 2nd level GB: create a GB (col0, col1, count(c)) for each branch
final List<Integer> groupSetPositions = Lists.newArrayList();
// the index of c
int cInd = union.getRowType().getFieldList().size() - 1;
for (int index = 0; index < union.getRowType().getFieldList().size(); index++) {
if (index != cInd) {
groupSetPositions.add(index);
}
}
List<AggregateCall> aggregateCalls = Lists.newArrayList();
RelDataType aggFnRetType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory());
AggregateCall aggregateCall = HiveCalciteUtil.createSingleArgAggCall("count", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
aggregateCalls.add(aggregateCall);
if (hiveIntersect.all) {
aggregateCall = HiveCalciteUtil.createSingleArgAggCall("min", cluster, TypeInfoFactory.longTypeInfo, cInd, aggFnRetType);
aggregateCalls.add(aggregateCall);
}
final ImmutableBitSet groupSet = ImmutableBitSet.of(groupSetPositions);
HiveRelNode aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), union, false, groupSet, null, aggregateCalls);
// add a filter count(c) = #branches
int countInd = cInd;
List<RexNode> childRexNodeLst = new ArrayList<RexNode>();
RexInputRef ref = rexBuilder.makeInputRef(aggregateRel, countInd);
RexLiteral literal = rexBuilder.makeBigintLiteral(new BigDecimal(numOfBranch));
childRexNodeLst.add(ref);
childRexNodeLst.add(literal);
ImmutableList.Builder<RelDataType> calciteArgTypesBldr = new ImmutableList.Builder<RelDataType>();
calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
calciteArgTypesBldr.add(TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()));
RexNode factoredFilterExpr = null;
try {
factoredFilterExpr = rexBuilder.makeCall(SqlFunctionConverter.getCalciteFn("=", calciteArgTypesBldr.build(), TypeConverter.convert(TypeInfoFactory.longTypeInfo, cluster.getTypeFactory()), true), childRexNodeLst);
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), aggregateRel, factoredFilterExpr);
if (!hiveIntersect.all) {
// the schema for intersect distinct is like this
// R3 on all attributes + count(c) as cnt
// finally add a project to project out the last column
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(filterRel.getRowType().getFieldList().size() - 1);
try {
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(filterRel, projectOutColumnPositions));
} catch (CalciteSemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
} else {
// the schema for intersect all is like this
// R3 + count(c) as cnt + min(c) as m
// we create a input project for udtf whose schema is like this
// min(c) as m + R3
List<RexNode> originalInputRefs = Lists.transform(filterRel.getRowType().getFieldList(), new Function<RelDataTypeField, RexNode>() {
@Override
public RexNode apply(RelDataTypeField input) {
return new RexInputRef(input.getIndex(), input.getType());
}
});
List<RexNode> copyInputRefs = new ArrayList<>();
copyInputRefs.add(originalInputRefs.get(originalInputRefs.size() - 1));
for (int i = 0; i < originalInputRefs.size() - 2; i++) {
copyInputRefs.add(originalInputRefs.get(i));
}
RelNode srcRel = null;
try {
srcRel = HiveProject.create(filterRel, copyInputRefs, null);
HiveTableFunctionScan udtf = HiveCalciteUtil.createUDTFForSetOp(cluster, srcRel);
// finally add a project to project out the 1st column
Set<Integer> projectOutColumnPositions = new HashSet<>();
projectOutColumnPositions.add(0);
call.transformTo(HiveCalciteUtil.createProjectWithoutColumn(udtf, projectOutColumnPositions));
} catch (SemanticException e) {
LOG.debug(e.toString());
throw new RuntimeException(e);
}
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException in project hive by apache.
the class HiveJoinAddNotNullRule method onMatch.
//~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final Join join = call.rel(0);
RelNode lChild = join.getLeft();
RelNode rChild = join.getRight();
HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
assert registry != null;
if (join.getJoinType() != JoinRelType.INNER) {
return;
}
if (join.getCondition().isAlwaysTrue()) {
return;
}
JoinPredicateInfo joinPredInfo;
try {
joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join);
} catch (CalciteSemanticException e) {
return;
}
Set<Integer> joinLeftKeyPositions = new HashSet<Integer>();
Set<Integer> joinRightKeyPositions = new HashSet<Integer>();
for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(i);
joinLeftKeyPositions.addAll(joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema());
joinRightKeyPositions.addAll(joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema());
}
// Build not null conditions
final RelOptCluster cluster = join.getCluster();
final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
final List<RexNode> newLeftConditions = getNotNullConditions(cluster, rexBuilder, join.getLeft(), joinLeftKeyPositions, leftPushedPredicates);
Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
final List<RexNode> newRightConditions = getNotNullConditions(cluster, rexBuilder, join.getRight(), joinRightKeyPositions, rightPushedPredicates);
// Nothing will be added to the expression
RexNode newLeftPredicate = RexUtil.composeConjunction(rexBuilder, newLeftConditions, false);
RexNode newRightPredicate = RexUtil.composeConjunction(rexBuilder, newRightConditions, false);
if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) {
return;
}
if (!newLeftPredicate.isAlwaysTrue()) {
RelNode curr = lChild;
lChild = filterFactory.createFilter(lChild, newLeftPredicate);
call.getPlanner().onCopy(curr, lChild);
}
if (!newRightPredicate.isAlwaysTrue()) {
RelNode curr = rChild;
rChild = filterFactory.createFilter(rChild, newRightPredicate);
call.getPlanner().onCopy(curr, rChild);
}
Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), lChild, rChild, join.getJoinType(), join.isSemiJoinDone());
call.getPlanner().onCopy(join, newJoin);
// Register information about created predicates
registry.getPushedPredicates(newJoin, 0).addAll(leftPushedPredicates);
registry.getPushedPredicates(newJoin, 1).addAll(rightPushedPredicates);
call.transformTo(newJoin);
}
Aggregations