Search in sources :

Example 1 with AggregateFunction

use of org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction in project boostkit-bigdata by kunpengcompute.

the class DataIoAdapter method createAggregationInfo.

private Optional<AggregationInfo> createAggregationInfo(List<AggregateFunction> aggregateFunctions, List<NamedExpression> namedExpressions) {
    List<RowExpression> groupingKeys = new ArrayList<>();
    Map<String, AggregationInfo.AggregateFunction> aggregationMap = new LinkedHashMap<>();
    boolean isEmpty = true;
    for (NamedExpression namedExpression : namedExpressions) {
        RowExpression groupingKey = extractNamedExpression((Expression) namedExpression);
        groupingKeys.add(groupingKey);
        isEmpty = false;
    }
    for (AggregateFunction aggregateFunction : aggregateFunctions) {
        extractAggregateFunction(aggregateFunction, aggregationMap);
        isEmpty = false;
    }
    return isEmpty ? Optional.empty() : Optional.of(new AggregationInfo(aggregationMap, groupingKeys));
}
Also used : NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) AggregateFunction(org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) AggregationInfo(com.huawei.boostkit.omnidata.model.AggregationInfo) LinkedHashMap(java.util.LinkedHashMap)

Example 2 with AggregateFunction

use of org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction in project boostkit-bigdata by kunpengcompute.

the class DataIoAdapter method extractAggAndGroupExpression.

private Optional<AggregationInfo> extractAggAndGroupExpression(List<AggExeInfo> aggExecutionList) {
    Optional<AggregationInfo> resAggregationInfo = Optional.empty();
    for (AggExeInfo aggExeInfo : aggExecutionList) {
        List<AggregateFunction> aggregateExpressions = JavaConverters.seqAsJavaList(aggExeInfo.aggregateExpressions());
        List<NamedExpression> namedExpressions = JavaConverters.seqAsJavaList(aggExeInfo.groupingExpressions());
        resAggregationInfo = createAggregationInfo(aggregateExpressions, namedExpressions);
    }
    return resAggregationInfo;
}
Also used : AggExeInfo(org.apache.spark.sql.execution.ndp.AggExeInfo) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) AggregateFunction(org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction) AggregationInfo(com.huawei.boostkit.omnidata.model.AggregationInfo)

Example 3 with AggregateFunction

use of org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction in project boostkit-bigdata by kunpengcompute.

the class NdpUtils method getColumnOffsetByAggExeInfo.

public static int getColumnOffsetByAggExeInfo(StructType dataSchema, Seq<AggExeInfo> aggExeInfo) {
    String columnName = "";
    int columnTempId = 0;
    if (aggExeInfo != null && aggExeInfo.size() > 0) {
        List<AggExeInfo> aggExecutionList = JavaConverters.seqAsJavaList(aggExeInfo);
        for (AggExeInfo aggExeInfoTemp : aggExecutionList) {
            List<AggregateFunction> aggregateExpressions = JavaConverters.seqAsJavaList(aggExeInfoTemp.aggregateExpressions());
            for (AggregateFunction aggregateFunction : aggregateExpressions) {
                List<Expression> expressions = JavaConverters.seqAsJavaList(aggregateFunction.children());
                for (Expression expression : expressions) {
                    columnName = expression.toString().split("#")[0].replaceAll("\\(", "");
                    Pattern pattern = Pattern.compile(columnName + "#(\\d+)");
                    Matcher matcher = pattern.matcher(expression.toString());
                    if (matcher.find()) {
                        columnTempId = Integer.parseInt(matcher.group(1));
                        break;
                    }
                }
                break;
            }
            List<NamedExpression> namedExpressions = JavaConverters.seqAsJavaList(aggExeInfoTemp.groupingExpressions());
            for (NamedExpression namedExpression : namedExpressions) {
                columnName = namedExpression.toString().split("#")[0];
                columnTempId = NdpUtils.getColumnId(namedExpression.toString());
                break;
            }
        }
    }
    Map<String, Integer> columnMap = new HashMap<>();
    scala.collection.Iterator<StructField> allTableSchemas = dataSchema.iterator();
    int dataSchemaColumnNum = 0;
    while (allTableSchemas.hasNext()) {
        StructField structField = allTableSchemas.next();
        columnMap.put(structField.name(), dataSchemaColumnNum++);
    }
    int columnOffset = columnTempId - columnMap.getOrDefault(columnName, columnMap.size());
    return Math.abs(columnOffset);
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) HashMap(java.util.HashMap) AggExeInfo(org.apache.spark.sql.execution.ndp.AggExeInfo) StructField(org.apache.spark.sql.types.StructField) ConstantExpression(io.prestosql.spi.relation.ConstantExpression) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) Expression(org.apache.spark.sql.catalyst.expressions.Expression) NamedExpression(org.apache.spark.sql.catalyst.expressions.NamedExpression) AggregateFunction(org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction)

Aggregations

NamedExpression (org.apache.spark.sql.catalyst.expressions.NamedExpression)3 AggregateFunction (org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction)3 AggregationInfo (com.huawei.boostkit.omnidata.model.AggregationInfo)2 AggExeInfo (org.apache.spark.sql.execution.ndp.AggExeInfo)2 ConstantExpression (io.prestosql.spi.relation.ConstantExpression)1 RowExpression (io.prestosql.spi.relation.RowExpression)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 Expression (org.apache.spark.sql.catalyst.expressions.Expression)1 StructField (org.apache.spark.sql.types.StructField)1