use of org.apache.calcite.rel.type.RelDataTypeFactory in project hive by apache.
the class HiveAggregateReduceFunctionsRule method reduceStddev.
private RexNode reduceStddev(Aggregate oldAggRel, AggregateCall oldCall, boolean biased, boolean sqrt, List<AggregateCall> newCalls, Map<AggregateCall, RexNode> aggCallMapping, List<RexNode> inputExprs) {
// stddev_pop(x) ==>
// power(
// (sum(x * x) - sum(x) * sum(x) / count(x))
// / count(x),
// .5)
//
// stddev_samp(x) ==>
// power(
// (sum(x * x) - sum(x) * sum(x) / count(x))
// / nullif(count(x) - 1, 0),
// .5)
final int nGroups = oldAggRel.getGroupCount();
final RelOptCluster cluster = oldAggRel.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
final RelDataTypeFactory typeFactory = cluster.getTypeFactory();
assert oldCall.getArgList().size() == 1 : oldCall.getArgList();
final int argOrdinal = oldCall.getArgList().get(0);
final RelDataType argOrdinalType = getFieldType(oldAggRel.getInput(), argOrdinal);
final RelDataType oldCallType = typeFactory.createTypeWithNullability(oldCall.getType(), true);
final RexNode argRef = rexBuilder.ensureType(oldCallType, inputExprs.get(argOrdinal), false);
final int argRefOrdinal = lookupOrAdd(inputExprs, argRef);
final RelDataType sumReturnType = getSumReturnType(rexBuilder.getTypeFactory(), argRef.getType());
final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, argRef, argRef);
final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared);
final RelDataType sumSquaredReturnType = getSumReturnType(rexBuilder.getTypeFactory(), argSquared.getType());
final AggregateCall sumArgSquaredAggCall = createAggregateCallWithBinding(typeFactory, new HiveSqlSumAggFunction(oldCall.isDistinct(), ReturnTypes.explicit(sumSquaredReturnType), InferTypes.explicit(Collections.singletonList(argSquared.getType())), // SqlStdOperatorTable.SUM,
oldCall.getAggregation().getOperandTypeChecker()), argSquared.getType(), oldAggRel, oldCall, argSquaredOrdinal);
final RexNode sumArgSquared = rexBuilder.addAggCall(sumArgSquaredAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(sumArgSquaredAggCall.getType()));
final AggregateCall sumArgAggCall = AggregateCall.create(new HiveSqlSumAggFunction(oldCall.isDistinct(), ReturnTypes.explicit(sumReturnType), InferTypes.explicit(Collections.singletonList(argOrdinalType)), // SqlStdOperatorTable.SUM,
oldCall.getAggregation().getOperandTypeChecker()), oldCall.isDistinct(), oldCall.isApproximate(), ImmutableIntList.of(argRefOrdinal), oldCall.filterArg, oldAggRel.getGroupCount(), oldAggRel.getInput(), null, null);
final RexNode sumArg = rexBuilder.addAggCall(sumArgAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(sumArgAggCall.getType()));
final RexNode sumArgCast = rexBuilder.ensureType(oldCallType, sumArg, true);
final RexNode sumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, sumArgCast, sumArgCast);
RelDataType countRetType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true);
final AggregateCall countArgAggCall = AggregateCall.create(new HiveSqlCountAggFunction(oldCall.isDistinct(), ReturnTypes.explicit(countRetType), oldCall.getAggregation().getOperandTypeInference(), // SqlStdOperatorTable.COUNT,
oldCall.getAggregation().getOperandTypeChecker()), oldCall.isDistinct(), oldCall.isApproximate(), oldCall.getArgList(), oldCall.filterArg, oldAggRel.getGroupCount(), oldAggRel.getInput(), countRetType, null);
final RexNode countArg = rexBuilder.addAggCall(countArgAggCall, nGroups, oldAggRel.indicator, newCalls, aggCallMapping, ImmutableList.of(argOrdinalType));
final RexNode avgSumSquaredArg = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, sumSquaredArg, countArg);
final RexNode diff = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, sumArgSquared, avgSumSquaredArg);
final RexNode denominator;
if (biased) {
denominator = countArg;
} else {
final RexLiteral one = rexBuilder.makeExactLiteral(BigDecimal.ONE);
final RexNode nul = rexBuilder.makeCast(countArg.getType(), rexBuilder.constantNull());
final RexNode countMinusOne = rexBuilder.makeCall(SqlStdOperatorTable.MINUS, countArg, one);
final RexNode countEqOne = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, countArg, one);
denominator = rexBuilder.makeCall(SqlStdOperatorTable.CASE, countEqOne, nul, countMinusOne);
}
final RexNode div = rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, diff, denominator);
RexNode result = div;
if (sqrt) {
final RexNode half = rexBuilder.makeExactLiteral(new BigDecimal("0.5"));
result = rexBuilder.makeCall(SqlStdOperatorTable.POWER, div, half);
}
return rexBuilder.makeCast(oldCall.getType(), result);
}
use of org.apache.calcite.rel.type.RelDataTypeFactory in project hive by apache.
the class HiveAggregate method deriveRowType.
public static RelDataType deriveRowType(RelDataTypeFactory typeFactory, final RelDataType inputRowType, boolean indicator, ImmutableBitSet groupSet, List<ImmutableBitSet> groupSets, final List<AggregateCall> aggCalls) {
final List<Integer> groupList = groupSet.asList();
assert groupList.size() == groupSet.cardinality();
final RelDataTypeFactory.FieldInfoBuilder builder = typeFactory.builder();
final List<RelDataTypeField> fieldList = inputRowType.getFieldList();
final Set<String> containedNames = Sets.newHashSet();
for (int groupKey : groupList) {
containedNames.add(fieldList.get(groupKey).getName());
builder.add(fieldList.get(groupKey));
}
if (indicator) {
for (int groupKey : groupList) {
final RelDataType booleanType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BOOLEAN), false);
String name = "i$" + fieldList.get(groupKey).getName();
int i = 0;
while (containedNames.contains(name)) {
name += "_" + i++;
}
containedNames.add(name);
builder.add(name, booleanType);
}
}
for (Ord<AggregateCall> aggCall : Ord.zip(aggCalls)) {
String name;
if (aggCall.e.name != null) {
name = aggCall.e.name;
} else {
name = "$f" + (groupList.size() + aggCall.i);
}
int i = 0;
while (containedNames.contains(name)) {
name += "_" + i++;
}
containedNames.add(name);
builder.add(name, aggCall.e.type);
}
return builder.build();
}
use of org.apache.calcite.rel.type.RelDataTypeFactory in project flink by apache.
the class FlinkAggregateExpandDistinctAggregatesRule method rewriteUsingGroupingSets.
/*
public RelBuilder convertSingletonDistinct(RelBuilder relBuilder,
Aggregate aggregate, Set<Pair<List<Integer>, Integer>> argLists) {
// For example,
// SELECT deptno, COUNT(*), SUM(bonus), MIN(DISTINCT sal)
// FROM emp
// GROUP BY deptno
//
// becomes
//
// SELECT deptno, SUM(cnt), SUM(bonus), MIN(sal)
// FROM (
// SELECT deptno, COUNT(*) as cnt, SUM(bonus), sal
// FROM EMP
// GROUP BY deptno, sal) // Aggregate B
// GROUP BY deptno // Aggregate A
relBuilder.push(aggregate.getInput());
final List<Pair<RexNode, String>> projects = new ArrayList<>();
final Map<Integer, Integer> sourceOf = new HashMap<>();
SortedSet<Integer> newGroupSet = new TreeSet<>();
final List<RelDataTypeField> childFields =
relBuilder.peek().getRowType().getFieldList();
final boolean hasGroupBy = aggregate.getGroupSet().size() > 0;
// Add the distinct aggregate column(s) to the group-by columns,
// if not already a part of the group-by
newGroupSet.addAll(aggregate.getGroupSet().asList());
for (Pair<List<Integer>, Integer> argList : argLists) {
newGroupSet.addAll(argList.getKey());
}
// Re-map the arguments to the aggregate A. These arguments will get
// remapped because of the intermediate aggregate B generated as part of the
// transformation.
for (int arg : newGroupSet) {
sourceOf.put(arg, projects.size());
projects.add(RexInputRef.of2(arg, childFields));
}
// Generate the intermediate aggregate B
final List<AggregateCall> aggCalls = aggregate.getAggCallList();
final List<AggregateCall> newAggCalls = new ArrayList<>();
final List<Integer> fakeArgs = new ArrayList<>();
final Map<AggregateCall, Integer> callArgMap = new HashMap<>();
// First identify the real arguments, then use the rest for fake arguments
// e.g. if real arguments are 0, 1, 3. Then the fake arguments will be 2, 4
for (final AggregateCall aggCall : aggCalls) {
if (!aggCall.isDistinct()) {
for (int arg : aggCall.getArgList()) {
if (!sourceOf.containsKey(arg)) {
sourceOf.put(arg, projects.size());
}
}
}
}
int fakeArg0 = 0;
for (final AggregateCall aggCall : aggCalls) {
// We will deal with non-distinct aggregates below
if (!aggCall.isDistinct()) {
boolean isGroupKeyUsedInAgg = false;
for (int arg : aggCall.getArgList()) {
if (sourceOf.containsKey(arg)) {
isGroupKeyUsedInAgg = true;
break;
}
}
if (aggCall.getArgList().size() == 0 || isGroupKeyUsedInAgg) {
while (sourceOf.get(fakeArg0) != null) {
++fakeArg0;
}
fakeArgs.add(fakeArg0);
}
}
}
for (final AggregateCall aggCall : aggCalls) {
if (!aggCall.isDistinct()) {
for (int arg : aggCall.getArgList()) {
if (!sourceOf.containsKey(arg)) {
sourceOf.remove(arg);
}
}
}
}
// Compute the remapped arguments using fake arguments for non-distinct
// aggregates with no arguments e.g. count(*).
int fakeArgIdx = 0;
for (final AggregateCall aggCall : aggCalls) {
// Project the column corresponding to the distinct aggregate. Project
// as-is all the non-distinct aggregates
if (!aggCall.isDistinct()) {
final AggregateCall newCall =
AggregateCall.create(aggCall.getAggregation(), false,
aggCall.getArgList(), -1,
ImmutableBitSet.of(newGroupSet).cardinality(),
relBuilder.peek(), null, aggCall.name);
newAggCalls.add(newCall);
if (newCall.getArgList().size() == 0) {
int fakeArg = fakeArgs.get(fakeArgIdx);
callArgMap.put(newCall, fakeArg);
sourceOf.put(fakeArg, projects.size());
projects.add(
Pair.of((RexNode) new RexInputRef(fakeArg, newCall.getType()),
newCall.getName()));
++fakeArgIdx;
} else {
for (int arg : newCall.getArgList()) {
if (sourceOf.containsKey(arg)) {
int fakeArg = fakeArgs.get(fakeArgIdx);
callArgMap.put(newCall, fakeArg);
sourceOf.put(fakeArg, projects.size());
projects.add(
Pair.of((RexNode) new RexInputRef(fakeArg, newCall.getType()),
newCall.getName()));
++fakeArgIdx;
} else {
sourceOf.put(arg, projects.size());
projects.add(
Pair.of((RexNode) new RexInputRef(arg, newCall.getType()),
newCall.getName()));
}
}
}
}
}
// Generate the aggregate B (see the reference example above)
relBuilder.push(
aggregate.copy(
aggregate.getTraitSet(), relBuilder.build(),
false, ImmutableBitSet.of(newGroupSet), null, newAggCalls));
// Convert the existing aggregate to aggregate A (see the reference example above)
final List<AggregateCall> newTopAggCalls =
Lists.newArrayList(aggregate.getAggCallList());
// Use the remapped arguments for the (non)distinct aggregate calls
for (int i = 0; i < newTopAggCalls.size(); i++) {
// Re-map arguments.
final AggregateCall aggCall = newTopAggCalls.get(i);
final int argCount = aggCall.getArgList().size();
final List<Integer> newArgs = new ArrayList<>(argCount);
final AggregateCall newCall;
for (int j = 0; j < argCount; j++) {
final Integer arg = aggCall.getArgList().get(j);
if (callArgMap.containsKey(aggCall)) {
newArgs.add(sourceOf.get(callArgMap.get(aggCall)));
}
else {
newArgs.add(sourceOf.get(arg));
}
}
if (aggCall.isDistinct()) {
newCall =
AggregateCall.create(aggCall.getAggregation(), false, newArgs,
-1, aggregate.getGroupSet().cardinality(), relBuilder.peek(),
aggCall.getType(), aggCall.name);
} else {
// If aggregate B had a COUNT aggregate call the corresponding aggregate at
// aggregate A must be SUM. For other aggregates, it remains the same.
if (aggCall.getAggregation() instanceof SqlCountAggFunction) {
if (aggCall.getArgList().size() == 0) {
newArgs.add(sourceOf.get(callArgMap.get(aggCall)));
}
if (hasGroupBy) {
SqlSumAggFunction sumAgg = new SqlSumAggFunction(null);
newCall =
AggregateCall.create(sumAgg, false, newArgs, -1,
aggregate.getGroupSet().cardinality(), relBuilder.peek(),
aggCall.getType(), aggCall.getName());
} else {
SqlSumEmptyIsZeroAggFunction sumAgg = new SqlSumEmptyIsZeroAggFunction();
newCall =
AggregateCall.create(sumAgg, false, newArgs, -1,
aggregate.getGroupSet().cardinality(), relBuilder.peek(),
aggCall.getType(), aggCall.getName());
}
} else {
newCall =
AggregateCall.create(aggCall.getAggregation(), false, newArgs, -1,
aggregate.getGroupSet().cardinality(),
relBuilder.peek(), aggCall.getType(), aggCall.name);
}
}
newTopAggCalls.set(i, newCall);
}
// Populate the group-by keys with the remapped arguments for aggregate A
newGroupSet.clear();
for (int arg : aggregate.getGroupSet()) {
newGroupSet.add(sourceOf.get(arg));
}
relBuilder.push(
aggregate.copy(aggregate.getTraitSet(),
relBuilder.build(), aggregate.indicator,
ImmutableBitSet.of(newGroupSet), null, newTopAggCalls));
return relBuilder;
}
*/
@SuppressWarnings("DanglingJavadoc")
private void rewriteUsingGroupingSets(RelOptRuleCall call, Aggregate aggregate, Set<Pair<List<Integer>, Integer>> argLists) {
final Set<ImmutableBitSet> groupSetTreeSet = new TreeSet<>(ImmutableBitSet.ORDERING);
groupSetTreeSet.add(aggregate.getGroupSet());
for (Pair<List<Integer>, Integer> argList : argLists) {
groupSetTreeSet.add(ImmutableBitSet.of(argList.left).setIf(argList.right, argList.right >= 0).union(aggregate.getGroupSet()));
}
final ImmutableList<ImmutableBitSet> groupSets = ImmutableList.copyOf(groupSetTreeSet);
final ImmutableBitSet fullGroupSet = ImmutableBitSet.union(groupSets);
final List<AggregateCall> distinctAggCalls = new ArrayList<>();
for (Pair<AggregateCall, String> aggCall : aggregate.getNamedAggCalls()) {
if (!aggCall.left.isDistinct()) {
distinctAggCalls.add(aggCall.left.rename(aggCall.right));
}
}
final RelBuilder relBuilder = call.builder();
relBuilder.push(aggregate.getInput());
relBuilder.aggregate(relBuilder.groupKey(fullGroupSet, groupSets.size() > 1, groupSets), distinctAggCalls);
final RelNode distinct = relBuilder.peek();
final int groupCount = fullGroupSet.cardinality();
final int indicatorCount = groupSets.size() > 1 ? groupCount : 0;
final RelOptCluster cluster = aggregate.getCluster();
final RexBuilder rexBuilder = cluster.getRexBuilder();
final RelDataTypeFactory typeFactory = cluster.getTypeFactory();
final RelDataType booleanType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BOOLEAN), false);
final List<Pair<RexNode, String>> predicates = new ArrayList<>();
final Map<ImmutableBitSet, Integer> filters = new HashMap<>();
/** Function to register a filter for a group set. */
class Registrar {
RexNode group = null;
private int register(ImmutableBitSet groupSet) {
if (group == null) {
group = makeGroup(groupCount - 1);
}
final RexNode node = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, group, rexBuilder.makeExactLiteral(toNumber(remap(fullGroupSet, groupSet))));
predicates.add(Pair.of(node, toString(groupSet)));
return groupCount + indicatorCount + distinctAggCalls.size() + predicates.size() - 1;
}
private RexNode makeGroup(int i) {
final RexInputRef ref = rexBuilder.makeInputRef(booleanType, groupCount + i);
final RexNode kase = rexBuilder.makeCall(SqlStdOperatorTable.CASE, ref, rexBuilder.makeExactLiteral(BigDecimal.ZERO), rexBuilder.makeExactLiteral(TWO.pow(i)));
if (i == 0) {
return kase;
} else {
return rexBuilder.makeCall(SqlStdOperatorTable.PLUS, makeGroup(i - 1), kase);
}
}
private BigDecimal toNumber(ImmutableBitSet bitSet) {
BigDecimal n = BigDecimal.ZERO;
for (int key : bitSet) {
n = n.add(TWO.pow(key));
}
return n;
}
private String toString(ImmutableBitSet bitSet) {
final StringBuilder buf = new StringBuilder("$i");
for (int key : bitSet) {
buf.append(key).append('_');
}
return buf.substring(0, buf.length() - 1);
}
}
final Registrar registrar = new Registrar();
for (ImmutableBitSet groupSet : groupSets) {
filters.put(groupSet, registrar.register(groupSet));
}
if (!predicates.isEmpty()) {
List<Pair<RexNode, String>> nodes = new ArrayList<>();
for (RelDataTypeField f : relBuilder.peek().getRowType().getFieldList()) {
final RexNode node = rexBuilder.makeInputRef(f.getType(), f.getIndex());
nodes.add(Pair.of(node, f.getName()));
}
nodes.addAll(predicates);
relBuilder.project(Pair.left(nodes), Pair.right(nodes));
}
int x = groupCount + indicatorCount;
final List<AggregateCall> newCalls = new ArrayList<>();
for (AggregateCall aggCall : aggregate.getAggCallList()) {
final int newFilterArg;
final List<Integer> newArgList;
final SqlAggFunction aggregation;
if (!aggCall.isDistinct()) {
aggregation = SqlStdOperatorTable.MIN;
newArgList = ImmutableIntList.of(x++);
newFilterArg = filters.get(aggregate.getGroupSet());
} else {
aggregation = aggCall.getAggregation();
newArgList = remap(fullGroupSet, aggCall.getArgList());
newFilterArg = filters.get(ImmutableBitSet.of(aggCall.getArgList()).setIf(aggCall.filterArg, aggCall.filterArg >= 0).union(aggregate.getGroupSet()));
}
final AggregateCall newCall = AggregateCall.create(aggregation, false, newArgList, newFilterArg, aggregate.getGroupCount(), distinct, null, aggCall.name);
newCalls.add(newCall);
}
relBuilder.aggregate(relBuilder.groupKey(remap(fullGroupSet, aggregate.getGroupSet()), aggregate.indicator, remap(fullGroupSet, aggregate.getGroupSets())), newCalls);
relBuilder.convert(aggregate.getRowType(), true);
call.transformTo(relBuilder.build());
}
use of org.apache.calcite.rel.type.RelDataTypeFactory in project drill by apache.
the class ConvertHiveParquetScanToDrillParquetScan method createNativeScanRel.
/**
* Helper method which creates a DrillScalRel with native HiveScan.
*/
private DrillScanRel createNativeScanRel(final Map<String, String> partitionColMapping, final DrillScanRel hiveScanRel) throws Exception {
final RelDataTypeFactory typeFactory = hiveScanRel.getCluster().getTypeFactory();
final RelDataType varCharType = typeFactory.createSqlType(SqlTypeName.VARCHAR);
final List<String> nativeScanColNames = Lists.newArrayList();
final List<RelDataType> nativeScanColTypes = Lists.newArrayList();
for (RelDataTypeField field : hiveScanRel.getRowType().getFieldList()) {
final String dirColName = partitionColMapping.get(field.getName());
if (dirColName != null) {
// partition column
nativeScanColNames.add(dirColName);
nativeScanColTypes.add(varCharType);
} else {
nativeScanColNames.add(field.getName());
nativeScanColTypes.add(field.getType());
}
}
final RelDataType nativeScanRowType = typeFactory.createStructType(nativeScanColTypes, nativeScanColNames);
// Create the list of projected columns set in HiveScan. The order of this list may not be same as the order of
// columns in HiveScan row type. Note: If the HiveScan.getColumn() contains a '*', we just need to add it as it is,
// unlike above where we expanded the '*'. HiveScan and related (subscan) can handle '*'.
final List<SchemaPath> nativeScanCols = Lists.newArrayList();
for (SchemaPath colName : hiveScanRel.getColumns()) {
final String partitionCol = partitionColMapping.get(colName.getAsUnescapedPath());
if (partitionCol != null) {
nativeScanCols.add(SchemaPath.getSimplePath(partitionCol));
} else {
nativeScanCols.add(colName);
}
}
final HiveScan hiveScan = (HiveScan) hiveScanRel.getGroupScan();
final HiveDrillNativeParquetScan nativeHiveScan = new HiveDrillNativeParquetScan(hiveScan.getUserName(), hiveScan.hiveReadEntry, hiveScan.storagePlugin, nativeScanCols, null);
return new DrillScanRel(hiveScanRel.getCluster(), hiveScanRel.getTraitSet(), hiveScanRel.getTable(), nativeHiveScan, nativeScanRowType, nativeScanCols);
}
use of org.apache.calcite.rel.type.RelDataTypeFactory in project drill by apache.
the class DrillExtractConvertlet method convertCall.
/*
* Custom convertlet to handle extract functions. Optiq rewrites
* extract functions as divide and modulo functions, based on the
* data type. We cannot do that in Drill since we don't know the data type
* till we start scanning. So we don't rewrite extract and treat it as
* a regular function.
*/
@Override
public RexNode convertCall(SqlRexContext cx, SqlCall call) {
final RexBuilder rexBuilder = cx.getRexBuilder();
final List<SqlNode> operands = call.getOperandList();
final List<RexNode> exprs = new LinkedList<>();
String timeUnit = ((SqlIntervalQualifier) operands.get(0)).timeUnitRange.toString();
RelDataTypeFactory typeFactory = cx.getTypeFactory();
for (SqlNode node : operands) {
exprs.add(cx.convertExpression(node));
}
final RelDataType returnType;
if (call.getOperator() == SqlStdOperatorTable.EXTRACT) {
// Legacy code:
// The return type is wrong!
// Legacy code choose SqlTypeName.BIGINT simply to avoid conflicting against Calcite's inference mechanism
// (, which chose BIGINT in validation phase already)
// Determine NULL-able using 2nd argument's Null-able.
returnType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), exprs.get(1).getType().isNullable());
} else {
// Determine NULL-able using 2nd argument's Null-able.
returnType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(TypeInferenceUtils.getSqlTypeNameForTimeUnit(timeUnit)), exprs.get(1).getType().isNullable());
}
return rexBuilder.makeCall(returnType, call.getOperator(), exprs);
}
Aggregations