Search in sources :

Example 1 with PathlingFunctions

use of au.csiro.pathling.sql.PathlingFunctions in project pathling by aehrc.

the class AggregateExecutor method buildQuery.

/**
 * @param query an {@link AggregateRequest}
 * @return a {@link ResultWithExpressions}, which includes the uncollected {@link Dataset}
 */
@SuppressWarnings("WeakerAccess")
@Nonnull
public ResultWithExpressions buildQuery(@Nonnull final AggregateRequest query) {
    log.info("Executing request: {}", query);
    // Build a new expression parser, and parse all of the filter and grouping expressions within
    // the query.
    final ResourcePath inputContext = ResourcePath.build(getFhirContext(), getDatabase(), query.getSubjectResource(), query.getSubjectResource().toCode(), true);
    final ParserContext groupingAndFilterContext = buildParserContext(inputContext, Collections.singletonList(inputContext.getIdColumn()));
    final Parser parser = new Parser(groupingAndFilterContext);
    final List<FhirPath> filters = parseFilters(parser, query.getFilters());
    final List<FhirPathAndContext> groupingParseResult = parseMaterializableExpressions(groupingAndFilterContext, query.getGroupings(), "Grouping");
    final List<FhirPath> groupings = groupingParseResult.stream().map(FhirPathAndContext::getFhirPath).collect(Collectors.toList());
    // Join all filter and grouping expressions together.
    final Column idColumn = inputContext.getIdColumn();
    Dataset<Row> groupingsAndFilters = joinExpressionsAndFilters(inputContext, groupings, filters, idColumn);
    // Apply filters.
    groupingsAndFilters = applyFilters(groupingsAndFilters, filters);
    // Remove synthetic fields from struct values (such as _fid) before grouping.
    final DatasetWithColumnMap datasetWithNormalizedGroupings = createColumns(groupingsAndFilters, groupings.stream().map(FhirPath::getValueColumn).map(PathlingFunctions::pruneSyntheticFields).toArray(Column[]::new));
    groupingsAndFilters = datasetWithNormalizedGroupings.getDataset();
    final List<Column> groupingColumns = new ArrayList<>(datasetWithNormalizedGroupings.getColumnMap().values());
    // The input context will be identical to that used for the groupings and filters, except that
    // it will use the dataset that resulted from the parsing of the groupings and filters,
    // instead of just the raw resource. This is so that any aggregations that are performed
    // during the parse can use these columns for grouping, rather than the identity of each
    // resource.
    final ResourcePath aggregationContext = inputContext.copy(inputContext.getExpression(), groupingsAndFilters, idColumn, inputContext.getEidColumn(), inputContext.getValueColumn(), inputContext.isSingular(), Optional.empty());
    final ParserContext aggregationParserContext = buildParserContext(aggregationContext, groupingColumns);
    final Parser aggregationParser = new Parser(aggregationParserContext);
    // Parse the aggregations, and grab the updated grouping columns. When aggregations are
    // performed during an aggregation parse, the grouping columns need to be updated, as any
    // aggregation operation erases the previous columns that were built up within the dataset.
    final List<FhirPath> aggregations = parseAggregations(aggregationParser, query.getAggregations());
    // Join the aggregations together, using equality of the grouping column values as the join
    // condition.
    final List<Column> aggregationColumns = aggregations.stream().map(FhirPath::getValueColumn).collect(Collectors.toList());
    final Dataset<Row> joinedAggregations = joinExpressionsByColumns(aggregations, groupingColumns);
    // The final column selection will be the grouping columns, followed by the aggregation
    // columns.
    final List<Column> finalSelection = new ArrayList<>(groupingColumns);
    finalSelection.addAll(aggregationColumns);
    final Dataset<Row> finalDataset = joinedAggregations.select(finalSelection.toArray(new Column[0])).distinct();
    return new ResultWithExpressions(finalDataset, aggregations, groupings, filters);
}
Also used : FhirPath(au.csiro.pathling.fhirpath.FhirPath) PathlingFunctions(au.csiro.pathling.sql.PathlingFunctions) Parser(au.csiro.pathling.fhirpath.parser.Parser) ResourcePath(au.csiro.pathling.fhirpath.ResourcePath) DatasetWithColumnMap(au.csiro.pathling.QueryHelpers.DatasetWithColumnMap) Column(org.apache.spark.sql.Column) Row(org.apache.spark.sql.Row) ParserContext(au.csiro.pathling.fhirpath.parser.ParserContext) Nonnull(javax.annotation.Nonnull)

Aggregations

DatasetWithColumnMap (au.csiro.pathling.QueryHelpers.DatasetWithColumnMap)1 FhirPath (au.csiro.pathling.fhirpath.FhirPath)1 ResourcePath (au.csiro.pathling.fhirpath.ResourcePath)1 Parser (au.csiro.pathling.fhirpath.parser.Parser)1 ParserContext (au.csiro.pathling.fhirpath.parser.ParserContext)1 PathlingFunctions (au.csiro.pathling.sql.PathlingFunctions)1 Nonnull (javax.annotation.Nonnull)1 Column (org.apache.spark.sql.Column)1 Row (org.apache.spark.sql.Row)1