Examples with DatasetWithColumnMap - au.csiro.pathling.QueryHelpers.DatasetWithColumnMap

Example 1 with DatasetWithColumnMap

use of au.csiro.pathling.QueryHelpers.DatasetWithColumnMap in project pathling by aehrc.

the class PathTraversalOperator method invoke.

/**
 * Invokes this operator with the specified inputs.
 *
 * @param input A {@link PathTraversalInput} object
 * @return A {@link FhirPath} object representing the resulting expression
 */
@Nonnull
public ElementPath invoke(@Nonnull final PathTraversalInput input) {
    checkUserInput(input.getLeft() instanceof NonLiteralPath, "Path traversal operator cannot be invoked on a literal value: " + input.getLeft().getExpression());
    final NonLiteralPath left = (NonLiteralPath) input.getLeft();
    final String right = input.getRight();
    // If the input expression is the same as the input context, the child will be the start of the
    // expression. This is to account for where we omit the expression that represents the input
    // expression, e.g. "gender" instead of "Patient.gender".
    final String inputContextExpression = input.getContext().getInputContext().getExpression();
    final String expression = left.getExpression().equals(inputContextExpression) ? right : left.getExpression() + "." + right;
    final Optional<ElementDefinition> optionalChild = left.getChildElement(right);
    checkUserInput(optionalChild.isPresent(), "No such child: " + expression);
    final ElementDefinition childDefinition = optionalChild.get();
    final Dataset<Row> leftDataset = left.getDataset();
    final Column field;
    if (ExtensionSupport.EXTENSION_ELEMENT_NAME().equals(right)) {
        // Lookup the extensions by _fid in the extension container.
        field = left.getExtensionContainerColumn().apply(getValueField(left, ExtensionSupport.FID_FIELD_NAME()));
    } else {
        field = getValueField(left, right);
    }
    // If the element has a max cardinality of more than one, it will need to be "exploded" out into
    // multiple rows.
    final boolean maxCardinalityOfOne = childDefinition.getMaxCardinality() == 1;
    final boolean resultSingular = left.isSingular() && maxCardinalityOfOne;
    final Column valueColumn;
    final Optional<Column> eidColumnCandidate;
    final Dataset<Row> resultDataset;
    if (maxCardinalityOfOne) {
        valueColumn = field;
        eidColumnCandidate = left.getEidColumn();
        resultDataset = leftDataset;
    } else {
        final MutablePair<Column, Column> valueAndEidColumns = new MutablePair<>();
        final Dataset<Row> explodedDataset = left.explodeArray(leftDataset, field, valueAndEidColumns);
        final DatasetWithColumnMap datasetWithColumnMap = createColumns(explodedDataset, valueAndEidColumns.getLeft(), valueAndEidColumns.getRight());
        resultDataset = datasetWithColumnMap.getDataset();
        valueColumn = datasetWithColumnMap.getColumn(valueAndEidColumns.getLeft());
        eidColumnCandidate = Optional.of(datasetWithColumnMap.getColumn(valueAndEidColumns.getRight()));
    }
    final Optional<Column> eidColumn = resultSingular ? Optional.empty() : eidColumnCandidate;
    // If there is an element ID column, we need to add it to the parser context so that it can
    // be used within joins in certain situations, e.g. extract.
    eidColumn.ifPresent(c -> input.getContext().getNodeIdColumns().putIfAbsent(expression, c));
    return ElementPath.build(expression, resultDataset, left.getIdColumn(), eidColumn, valueColumn, resultSingular, left.getCurrentResource(), left.getThisColumn(), childDefinition);
}

Also used : MutablePair(org.apache.commons.lang3.tuple.MutablePair) DatasetWithColumnMap(au.csiro.pathling.QueryHelpers.DatasetWithColumnMap) Column(org.apache.spark.sql.Column) ElementDefinition(au.csiro.pathling.fhirpath.element.ElementDefinition) Row(org.apache.spark.sql.Row) NonLiteralPath(au.csiro.pathling.fhirpath.NonLiteralPath) Nonnull(javax.annotation.Nonnull)

Example 2 with DatasetWithColumnMap

use of au.csiro.pathling.QueryHelpers.DatasetWithColumnMap in project pathling by aehrc.

the class ElementPath method getInstance.

@Nonnull
private static ElementPath getInstance(@Nonnull final String expression, @Nonnull final Dataset<Row> dataset, @Nonnull final Column idColumn, @Nonnull final Optional<Column> eidColumn, @Nonnull final Column valueColumn, final boolean singular, @Nonnull final Optional<ResourcePath> currentResource, @Nonnull final Optional<Column> thisColumn, @Nonnull final FHIRDefinedType fhirType) {
    // Look up the class that represents an element with the specified FHIR type.
    final Class<? extends ElementPath> elementPathClass = ElementDefinition.elementClassForType(fhirType).orElse(ElementPath.class);
    final DatasetWithColumnMap datasetWithColumns = eidColumn.map(eidCol -> createColumns(dataset, eidCol, valueColumn)).orElseGet(() -> createColumns(dataset, valueColumn));
    try {
        // Call its constructor and return.
        final Constructor<? extends ElementPath> constructor = elementPathClass.getDeclaredConstructor(String.class, Dataset.class, Column.class, Optional.class, Column.class, boolean.class, Optional.class, Optional.class, FHIRDefinedType.class);
        return constructor.newInstance(expression, datasetWithColumns.getDataset(), idColumn, eidColumn.map(datasetWithColumns::getColumn), datasetWithColumns.getColumn(valueColumn), singular, currentResource, thisColumn, fhirType);
    } catch (final NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
        throw new RuntimeException("Problem building an ElementPath class", e);
    }
}

Also used : Getter(lombok.Getter) Dataset(org.apache.spark.sql.Dataset) NonLiteralPath(au.csiro.pathling.fhirpath.NonLiteralPath) FHIRDefinedType(org.hl7.fhir.r4.model.Enumerations.FHIRDefinedType) Column(org.apache.spark.sql.Column) QueryHelpers.createColumns(au.csiro.pathling.QueryHelpers.createColumns) Row(org.apache.spark.sql.Row) Constructor(java.lang.reflect.Constructor) ResourcePath(au.csiro.pathling.fhirpath.ResourcePath) InvocationTargetException(java.lang.reflect.InvocationTargetException) AccessLevel(lombok.AccessLevel) DatasetWithColumnMap(au.csiro.pathling.QueryHelpers.DatasetWithColumnMap) FhirPath(au.csiro.pathling.fhirpath.FhirPath) Optional(java.util.Optional) InvalidUserInputError(au.csiro.pathling.errors.InvalidUserInputError) Nonnull(javax.annotation.Nonnull) DatasetWithColumnMap(au.csiro.pathling.QueryHelpers.DatasetWithColumnMap) InvocationTargetException(java.lang.reflect.InvocationTargetException) Nonnull(javax.annotation.Nonnull)

Example 3 with DatasetWithColumnMap

use of au.csiro.pathling.QueryHelpers.DatasetWithColumnMap in project pathling by aehrc.

the class AggregateExecutor method buildQuery.

/**
 * @param query an {@link AggregateRequest}
 * @return a {@link ResultWithExpressions}, which includes the uncollected {@link Dataset}
 */
@SuppressWarnings("WeakerAccess")
@Nonnull
public ResultWithExpressions buildQuery(@Nonnull final AggregateRequest query) {
    log.info("Executing request: {}", query);
    // Build a new expression parser, and parse all of the filter and grouping expressions within
    // the query.
    final ResourcePath inputContext = ResourcePath.build(getFhirContext(), getDatabase(), query.getSubjectResource(), query.getSubjectResource().toCode(), true);
    final ParserContext groupingAndFilterContext = buildParserContext(inputContext, Collections.singletonList(inputContext.getIdColumn()));
    final Parser parser = new Parser(groupingAndFilterContext);
    final List<FhirPath> filters = parseFilters(parser, query.getFilters());
    final List<FhirPathAndContext> groupingParseResult = parseMaterializableExpressions(groupingAndFilterContext, query.getGroupings(), "Grouping");
    final List<FhirPath> groupings = groupingParseResult.stream().map(FhirPathAndContext::getFhirPath).collect(Collectors.toList());
    // Join all filter and grouping expressions together.
    final Column idColumn = inputContext.getIdColumn();
    Dataset<Row> groupingsAndFilters = joinExpressionsAndFilters(inputContext, groupings, filters, idColumn);
    // Apply filters.
    groupingsAndFilters = applyFilters(groupingsAndFilters, filters);
    // Remove synthetic fields from struct values (such as _fid) before grouping.
    final DatasetWithColumnMap datasetWithNormalizedGroupings = createColumns(groupingsAndFilters, groupings.stream().map(FhirPath::getValueColumn).map(PathlingFunctions::pruneSyntheticFields).toArray(Column[]::new));
    groupingsAndFilters = datasetWithNormalizedGroupings.getDataset();
    final List<Column> groupingColumns = new ArrayList<>(datasetWithNormalizedGroupings.getColumnMap().values());
    // The input context will be identical to that used for the groupings and filters, except that
    // it will use the dataset that resulted from the parsing of the groupings and filters,
    // instead of just the raw resource. This is so that any aggregations that are performed
    // during the parse can use these columns for grouping, rather than the identity of each
    // resource.
    final ResourcePath aggregationContext = inputContext.copy(inputContext.getExpression(), groupingsAndFilters, idColumn, inputContext.getEidColumn(), inputContext.getValueColumn(), inputContext.isSingular(), Optional.empty());
    final ParserContext aggregationParserContext = buildParserContext(aggregationContext, groupingColumns);
    final Parser aggregationParser = new Parser(aggregationParserContext);
    // Parse the aggregations, and grab the updated grouping columns. When aggregations are
    // performed during an aggregation parse, the grouping columns need to be updated, as any
    // aggregation operation erases the previous columns that were built up within the dataset.
    final List<FhirPath> aggregations = parseAggregations(aggregationParser, query.getAggregations());
    // Join the aggregations together, using equality of the grouping column values as the join
    // condition.
    final List<Column> aggregationColumns = aggregations.stream().map(FhirPath::getValueColumn).collect(Collectors.toList());
    final Dataset<Row> joinedAggregations = joinExpressionsByColumns(aggregations, groupingColumns);
    // The final column selection will be the grouping columns, followed by the aggregation
    // columns.
    final List<Column> finalSelection = new ArrayList<>(groupingColumns);
    finalSelection.addAll(aggregationColumns);
    final Dataset<Row> finalDataset = joinedAggregations.select(finalSelection.toArray(new Column[0])).distinct();
    return new ResultWithExpressions(finalDataset, aggregations, groupings, filters);
}

Also used : FhirPath(au.csiro.pathling.fhirpath.FhirPath) PathlingFunctions(au.csiro.pathling.sql.PathlingFunctions) Parser(au.csiro.pathling.fhirpath.parser.Parser) ResourcePath(au.csiro.pathling.fhirpath.ResourcePath) DatasetWithColumnMap(au.csiro.pathling.QueryHelpers.DatasetWithColumnMap) Column(org.apache.spark.sql.Column) Row(org.apache.spark.sql.Row) ParserContext(au.csiro.pathling.fhirpath.parser.ParserContext) Nonnull(javax.annotation.Nonnull)

Example 4 with DatasetWithColumnMap

use of au.csiro.pathling.QueryHelpers.DatasetWithColumnMap in project pathling by aehrc.

the class ResourcePath method build.

/**
 * Build a new ResourcePath using the supplied {@link FhirContext} and {@link Database}.
 *
 * @param fhirContext the {@link FhirContext} to use for sourcing the resource definition
 * @param database the {@link Database} to use for retrieving the Dataset
 * @param resourceType the type of the resource
 * @param expression the expression to use in the resulting path
 * @param singular whether the resulting path should be flagged as a single item collection
 * @param skipAliasing set to true to skip column aliasing
 * @return A shiny new ResourcePath
 */
@Nonnull
public static ResourcePath build(@Nonnull final FhirContext fhirContext, @Nonnull final Database database, @Nonnull final ResourceType resourceType, @Nonnull final String expression, final boolean singular, final boolean skipAliasing) {
    // Get the resource definition from HAPI.
    final String resourceCode = resourceType.toCode();
    final RuntimeResourceDefinition hapiDefinition = fhirContext.getResourceDefinition(resourceCode);
    final ResourceDefinition definition = new ResourceDefinition(resourceType, hapiDefinition);
    // Retrieve the dataset for the resource type using the supplied resource reader.
    final Dataset<Row> dataset = database.read(resourceType);
    final Column idColumn = col("id");
    final Column finalIdColumn;
    final Dataset<Row> finalDataset;
    final Map<String, Column> elementsToColumns;
    if (skipAliasing) {
        // If aliasing is disabled, the dataset will contain columns with the original element names.
        // This is used for contexts where we need the original column names for encoding (e.g.
        // search).
        finalDataset = dataset;
        finalIdColumn = idColumn;
        elementsToColumns = Stream.of(dataset.columns()).collect(Collectors.toMap(Function.identity(), functions::col, (a, b) -> null));
    } else {
        // If aliasing is enabled, all columns in the dataset will be aliased, and the original
        // columns will be dropped. This is to avoid column name clashes when doing joins.
        final DatasetWithColumnMap datasetWithColumnMap = aliasAllColumns(dataset);
        finalDataset = datasetWithColumnMap.getDataset();
        final Map<Column, Column> columnMap = datasetWithColumnMap.getColumnMap();
        elementsToColumns = columnMap.keySet().stream().collect(Collectors.toMap(Column::toString, columnMap::get, (a, b) -> null));
        finalIdColumn = elementsToColumns.get(idColumn.toString());
    }
    // We use the ID column as the value column for a ResourcePath.
    return new ResourcePath(expression, finalDataset, finalIdColumn, Optional.empty(), finalIdColumn, singular, Optional.empty(), definition, elementsToColumns);
}

Also used : org.apache.spark.sql.functions(org.apache.spark.sql.functions) DatasetWithColumnMap(au.csiro.pathling.QueryHelpers.DatasetWithColumnMap) RuntimeResourceDefinition(ca.uhn.fhir.context.RuntimeResourceDefinition) Column(org.apache.spark.sql.Column) RuntimeResourceDefinition(ca.uhn.fhir.context.RuntimeResourceDefinition) Row(org.apache.spark.sql.Row) Nonnull(javax.annotation.Nonnull)

Aggregations

DatasetWithColumnMap (au.csiro.pathling.QueryHelpers.DatasetWithColumnMap)4 Nonnull (javax.annotation.Nonnull)4 Column (org.apache.spark.sql.Column)4 Row (org.apache.spark.sql.Row)4 FhirPath (au.csiro.pathling.fhirpath.FhirPath)2 NonLiteralPath (au.csiro.pathling.fhirpath.NonLiteralPath)2 ResourcePath (au.csiro.pathling.fhirpath.ResourcePath)2 QueryHelpers.createColumns (au.csiro.pathling.QueryHelpers.createColumns)1 InvalidUserInputError (au.csiro.pathling.errors.InvalidUserInputError)1 ElementDefinition (au.csiro.pathling.fhirpath.element.ElementDefinition)1 Parser (au.csiro.pathling.fhirpath.parser.Parser)1 ParserContext (au.csiro.pathling.fhirpath.parser.ParserContext)1 PathlingFunctions (au.csiro.pathling.sql.PathlingFunctions)1 RuntimeResourceDefinition (ca.uhn.fhir.context.RuntimeResourceDefinition)1 Constructor (java.lang.reflect.Constructor)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Optional (java.util.Optional)1 AccessLevel (lombok.AccessLevel)1 Getter (lombok.Getter)1 MutablePair (org.apache.commons.lang3.tuple.MutablePair)1