use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Aggregate in project beam by apache.
the class BeamAggregateProjectMergeRule method getUnderlyingIO.
/**
* Following scenarios are possible:<br>
* 1) Aggregate <- Project <- IO.<br>
* 2) Aggregate <- Project <- Chain of Project/Filter <- IO.<br>
* 3) Aggregate <- Project <- Something else.<br>
* 4) Aggregate <- Project <- Chain of Project/Filter <- Something else.
*
* @param parent project that matched this rule.
* @return {@code BeamIOSourceRel} when it is present or null when some other {@code RelNode} is
* present.
*/
private BeamIOSourceRel getUnderlyingIO(Set<RelNode> visitedNodes, SingleRel parent) {
// No need to look at the same node more than once.
if (visitedNodes.contains(parent)) {
return null;
}
visitedNodes.add(parent);
List<RelNode> nodes = ((RelSubset) parent.getInput()).getRelList();
for (RelNode node : nodes) {
if (node instanceof Filter || node instanceof Project) {
// Search node inputs for an IO.
BeamIOSourceRel child = getUnderlyingIO(visitedNodes, (SingleRel) node);
if (child != null) {
return child;
}
} else if (node instanceof BeamIOSourceRel) {
return (BeamIOSourceRel) node;
}
}
return null;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Aggregate in project beam by apache.
the class BeamZetaSqlCatalog method addFunction.
void addFunction(ResolvedNodes.ResolvedCreateFunctionStmt createFunctionStmt) {
String functionGroup = getFunctionGroup(createFunctionStmt);
switch(functionGroup) {
case USER_DEFINED_SQL_FUNCTIONS:
sqlScalarUdfs.put(createFunctionStmt.getNamePath(), createFunctionStmt);
break;
case USER_DEFINED_JAVA_SCALAR_FUNCTIONS:
String functionName = String.join(".", createFunctionStmt.getNamePath());
for (FunctionArgumentType argumentType : createFunctionStmt.getSignature().getFunctionArgumentList()) {
Type type = argumentType.getType();
if (type == null) {
throw new UnsupportedOperationException("UDF templated argument types are not supported.");
}
validateJavaUdfZetaSqlType(type, functionName);
}
if (createFunctionStmt.getReturnType() == null) {
throw new IllegalArgumentException("UDF return type must not be null.");
}
validateJavaUdfZetaSqlType(createFunctionStmt.getReturnType(), functionName);
String jarPath = getJarPath(createFunctionStmt);
ScalarFn scalarFn = javaUdfLoader.loadScalarFunction(createFunctionStmt.getNamePath(), jarPath);
Method method = ScalarFnReflector.getApplyMethod(scalarFn);
javaScalarUdfs.put(createFunctionStmt.getNamePath(), UserFunctionDefinitions.JavaScalarFunction.create(method, jarPath));
break;
case USER_DEFINED_JAVA_AGGREGATE_FUNCTIONS:
jarPath = getJarPath(createFunctionStmt);
// Try loading the aggregate function just to make sure it exists. LazyAggregateCombineFn
// will need to fetch it again at runtime.
javaUdfLoader.loadAggregateFunction(createFunctionStmt.getNamePath(), jarPath);
Combine.CombineFn<?, ?, ?> combineFn = new LazyAggregateCombineFn<>(createFunctionStmt.getNamePath(), jarPath);
javaUdafs.put(createFunctionStmt.getNamePath(), combineFn);
break;
default:
throw new IllegalArgumentException(String.format("Encountered unrecognized function group %s.", functionGroup));
}
zetaSqlCatalog.addFunction(new Function(createFunctionStmt.getNamePath(), functionGroup, createFunctionStmt.getIsAggregate() ? ZetaSQLFunctions.FunctionEnums.Mode.AGGREGATE : ZetaSQLFunctions.FunctionEnums.Mode.SCALAR, ImmutableList.of(createFunctionStmt.getSignature())));
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Aggregate in project beam by apache.
the class BeamAggregationRel method beamComputeSelfCost.
@Override
public BeamCostModel beamComputeSelfCost(RelOptPlanner planner, BeamRelMetadataQuery mq) {
NodeStats inputStat = BeamSqlRelUtils.getNodeStats(this.input, mq);
inputStat = computeWindowingCostEffect(inputStat);
// Aggregates with more aggregate functions cost a bit more
float multiplier = 1f + (float) aggCalls.size() * 0.125f;
for (AggregateCall aggCall : aggCalls) {
if (aggCall.getAggregation().getName().equals("SUM")) {
// Pretend that SUM costs a little bit more than $SUM0,
// to make things deterministic.
multiplier += 0.0125f;
}
}
return BeamCostModel.FACTORY.makeCost(inputStat.getRowCount() * multiplier, inputStat.getRate() * multiplier);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Aggregate in project beam by apache.
the class ExpressionConverter method convertRexNodeFromComputedColumnWithFieldList.
private RexNode convertRexNodeFromComputedColumnWithFieldList(ResolvedComputedColumn column, List<ResolvedColumn> columnList, List<RelDataTypeField> fieldList, int windowFieldIndex) {
if (column.getExpr().nodeKind() != RESOLVED_FUNCTION_CALL) {
return convertRexNodeFromResolvedExpr(column.getExpr(), columnList, fieldList, ImmutableMap.of());
}
ResolvedFunctionCall functionCall = (ResolvedFunctionCall) column.getExpr();
// TODO: is there any other illegal case?
if (functionCall.getFunction().getName().equals(FIXED_WINDOW) || functionCall.getFunction().getName().equals(SLIDING_WINDOW) || functionCall.getFunction().getName().equals(SESSION_WINDOW)) {
throw new ZetaSqlException(functionCall.getFunction().getName() + " shouldn't appear in SELECT exprlist.");
}
if (!functionCall.getFunction().getGroup().equals(PRE_DEFINED_WINDOW_FUNCTIONS)) {
// non-window function should still go through normal FunctionCall conversion process.
return convertRexNodeFromResolvedExpr(column.getExpr(), columnList, fieldList, ImmutableMap.of());
}
// ONLY window_start and window_end should arrive here.
// TODO: Have extra verification here to make sure window start/end functions have the same
// parameter with window function.
List<RexNode> operands = new ArrayList<>();
switch(functionCall.getFunction().getName()) {
case FIXED_WINDOW_START:
case SLIDING_WINDOW_START:
case SESSION_WINDOW_START:
// in Calcite.
case SESSION_WINDOW_END:
return rexBuilder().makeInputRef(fieldList.get(windowFieldIndex).getType(), windowFieldIndex);
case FIXED_WINDOW_END:
operands.add(rexBuilder().makeInputRef(fieldList.get(windowFieldIndex).getType(), windowFieldIndex));
// TODO: check window_end 's duration is the same as it's aggregate window.
operands.add(convertIntervalToRexIntervalLiteral((ResolvedLiteral) functionCall.getArgumentList().get(0)));
return rexBuilder().makeCall(SqlOperators.ZETASQL_TIMESTAMP_ADD, operands);
case SLIDING_WINDOW_END:
operands.add(rexBuilder().makeInputRef(fieldList.get(windowFieldIndex).getType(), windowFieldIndex));
operands.add(convertIntervalToRexIntervalLiteral((ResolvedLiteral) functionCall.getArgumentList().get(1)));
return rexBuilder().makeCall(SqlOperators.ZETASQL_TIMESTAMP_ADD, operands);
default:
throw new UnsupportedOperationException("Does not support window start/end: " + functionCall.getFunction().getName());
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Aggregate in project beam by apache.
the class AggregateScanConverter method convertAggregateScanInputScanToLogicalProject.
private LogicalProject convertAggregateScanInputScanToLogicalProject(ResolvedAggregateScan node, RelNode input) {
// AggregateScan's input is the source of data (e.g. TableScan), which is different from the
// design of CalciteSQL, in which the LogicalAggregate's input is a LogicalProject, whose input
// is a LogicalTableScan. When AggregateScan's input is WithRefScan, the WithRefScan is
// ebullient to a LogicalTableScan. So it's still required to build another LogicalProject as
// the input of LogicalAggregate.
List<RexNode> projects = new ArrayList<>();
List<String> fieldNames = new ArrayList<>();
// LogicalAggregate.
for (ResolvedComputedColumn computedColumn : node.getGroupByList()) {
projects.add(getExpressionConverter().convertRexNodeFromResolvedExpr(computedColumn.getExpr(), node.getInputScan().getColumnList(), input.getRowType().getFieldList(), ImmutableMap.of()));
fieldNames.add(getTrait().resolveAlias(computedColumn.getColumn()));
}
// TODO: remove duplicate columns in projects.
for (ResolvedComputedColumn resolvedComputedColumn : node.getAggregateList()) {
// Should create Calcite's RexInputRef from ResolvedColumn from ResolvedComputedColumn.
// TODO: handle aggregate function with more than one argument and handle OVER
// TODO: is there is general way for column reference tracking and deduplication for
// aggregation?
ResolvedAggregateFunctionCall aggregateFunctionCall = ((ResolvedAggregateFunctionCall) resolvedComputedColumn.getExpr());
if (aggregateFunctionCall.getArgumentList() != null && aggregateFunctionCall.getArgumentList().size() >= 1) {
ResolvedExpr resolvedExpr = aggregateFunctionCall.getArgumentList().get(0);
for (int i = 0; i < aggregateFunctionCall.getArgumentList().size(); i++) {
if (i == 0) {
// TODO: assume aggregate function's input is either a ColumnRef or a cast(ColumnRef).
// TODO: user might use multiple CAST so we need to handle this rare case.
projects.add(getExpressionConverter().convertRexNodeFromResolvedExpr(resolvedExpr, node.getInputScan().getColumnList(), input.getRowType().getFieldList(), ImmutableMap.of()));
} else {
projects.add(getExpressionConverter().convertRexNodeFromResolvedExpr(aggregateFunctionCall.getArgumentList().get(i)));
}
fieldNames.add(getTrait().resolveAlias(resolvedComputedColumn.getColumn()));
}
}
}
return LogicalProject.create(input, ImmutableList.of(), projects, fieldNames);
}
Aggregations