use of org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction in project boostkit-bigdata by kunpengcompute.
the class DataIoAdapter method createAggregationInfo.
private Optional<AggregationInfo> createAggregationInfo(List<AggregateFunction> aggregateFunctions, List<NamedExpression> namedExpressions) {
List<RowExpression> groupingKeys = new ArrayList<>();
Map<String, AggregationInfo.AggregateFunction> aggregationMap = new LinkedHashMap<>();
boolean isEmpty = true;
for (NamedExpression namedExpression : namedExpressions) {
RowExpression groupingKey = extractNamedExpression((Expression) namedExpression);
groupingKeys.add(groupingKey);
isEmpty = false;
}
for (AggregateFunction aggregateFunction : aggregateFunctions) {
extractAggregateFunction(aggregateFunction, aggregationMap);
isEmpty = false;
}
return isEmpty ? Optional.empty() : Optional.of(new AggregationInfo(aggregationMap, groupingKeys));
}
use of org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction in project boostkit-bigdata by kunpengcompute.
the class DataIoAdapter method extractAggAndGroupExpression.
private Optional<AggregationInfo> extractAggAndGroupExpression(List<AggExeInfo> aggExecutionList) {
Optional<AggregationInfo> resAggregationInfo = Optional.empty();
for (AggExeInfo aggExeInfo : aggExecutionList) {
List<AggregateFunction> aggregateExpressions = JavaConverters.seqAsJavaList(aggExeInfo.aggregateExpressions());
List<NamedExpression> namedExpressions = JavaConverters.seqAsJavaList(aggExeInfo.groupingExpressions());
resAggregationInfo = createAggregationInfo(aggregateExpressions, namedExpressions);
}
return resAggregationInfo;
}
use of org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction in project boostkit-bigdata by kunpengcompute.
the class NdpUtils method getColumnOffsetByAggExeInfo.
public static int getColumnOffsetByAggExeInfo(StructType dataSchema, Seq<AggExeInfo> aggExeInfo) {
String columnName = "";
int columnTempId = 0;
if (aggExeInfo != null && aggExeInfo.size() > 0) {
List<AggExeInfo> aggExecutionList = JavaConverters.seqAsJavaList(aggExeInfo);
for (AggExeInfo aggExeInfoTemp : aggExecutionList) {
List<AggregateFunction> aggregateExpressions = JavaConverters.seqAsJavaList(aggExeInfoTemp.aggregateExpressions());
for (AggregateFunction aggregateFunction : aggregateExpressions) {
List<Expression> expressions = JavaConverters.seqAsJavaList(aggregateFunction.children());
for (Expression expression : expressions) {
columnName = expression.toString().split("#")[0].replaceAll("\\(", "");
Pattern pattern = Pattern.compile(columnName + "#(\\d+)");
Matcher matcher = pattern.matcher(expression.toString());
if (matcher.find()) {
columnTempId = Integer.parseInt(matcher.group(1));
break;
}
}
break;
}
List<NamedExpression> namedExpressions = JavaConverters.seqAsJavaList(aggExeInfoTemp.groupingExpressions());
for (NamedExpression namedExpression : namedExpressions) {
columnName = namedExpression.toString().split("#")[0];
columnTempId = NdpUtils.getColumnId(namedExpression.toString());
break;
}
}
}
Map<String, Integer> columnMap = new HashMap<>();
scala.collection.Iterator<StructField> allTableSchemas = dataSchema.iterator();
int dataSchemaColumnNum = 0;
while (allTableSchemas.hasNext()) {
StructField structField = allTableSchemas.next();
columnMap.put(structField.name(), dataSchemaColumnNum++);
}
int columnOffset = columnTempId - columnMap.getOrDefault(columnName, columnMap.size());
return Math.abs(columnOffset);
}
Aggregations