use of au.csiro.pathling.QueryHelpers.DatasetWithColumnMap in project pathling by aehrc.
the class PathTraversalOperator method invoke.
/**
* Invokes this operator with the specified inputs.
*
* @param input A {@link PathTraversalInput} object
* @return A {@link FhirPath} object representing the resulting expression
*/
@Nonnull
public ElementPath invoke(@Nonnull final PathTraversalInput input) {
checkUserInput(input.getLeft() instanceof NonLiteralPath, "Path traversal operator cannot be invoked on a literal value: " + input.getLeft().getExpression());
final NonLiteralPath left = (NonLiteralPath) input.getLeft();
final String right = input.getRight();
// If the input expression is the same as the input context, the child will be the start of the
// expression. This is to account for where we omit the expression that represents the input
// expression, e.g. "gender" instead of "Patient.gender".
final String inputContextExpression = input.getContext().getInputContext().getExpression();
final String expression = left.getExpression().equals(inputContextExpression) ? right : left.getExpression() + "." + right;
final Optional<ElementDefinition> optionalChild = left.getChildElement(right);
checkUserInput(optionalChild.isPresent(), "No such child: " + expression);
final ElementDefinition childDefinition = optionalChild.get();
final Dataset<Row> leftDataset = left.getDataset();
final Column field;
if (ExtensionSupport.EXTENSION_ELEMENT_NAME().equals(right)) {
// Lookup the extensions by _fid in the extension container.
field = left.getExtensionContainerColumn().apply(getValueField(left, ExtensionSupport.FID_FIELD_NAME()));
} else {
field = getValueField(left, right);
}
// If the element has a max cardinality of more than one, it will need to be "exploded" out into
// multiple rows.
final boolean maxCardinalityOfOne = childDefinition.getMaxCardinality() == 1;
final boolean resultSingular = left.isSingular() && maxCardinalityOfOne;
final Column valueColumn;
final Optional<Column> eidColumnCandidate;
final Dataset<Row> resultDataset;
if (maxCardinalityOfOne) {
valueColumn = field;
eidColumnCandidate = left.getEidColumn();
resultDataset = leftDataset;
} else {
final MutablePair<Column, Column> valueAndEidColumns = new MutablePair<>();
final Dataset<Row> explodedDataset = left.explodeArray(leftDataset, field, valueAndEidColumns);
final DatasetWithColumnMap datasetWithColumnMap = createColumns(explodedDataset, valueAndEidColumns.getLeft(), valueAndEidColumns.getRight());
resultDataset = datasetWithColumnMap.getDataset();
valueColumn = datasetWithColumnMap.getColumn(valueAndEidColumns.getLeft());
eidColumnCandidate = Optional.of(datasetWithColumnMap.getColumn(valueAndEidColumns.getRight()));
}
final Optional<Column> eidColumn = resultSingular ? Optional.empty() : eidColumnCandidate;
// If there is an element ID column, we need to add it to the parser context so that it can
// be used within joins in certain situations, e.g. extract.
eidColumn.ifPresent(c -> input.getContext().getNodeIdColumns().putIfAbsent(expression, c));
return ElementPath.build(expression, resultDataset, left.getIdColumn(), eidColumn, valueColumn, resultSingular, left.getCurrentResource(), left.getThisColumn(), childDefinition);
}
use of au.csiro.pathling.QueryHelpers.DatasetWithColumnMap in project pathling by aehrc.
the class ElementPath method getInstance.
@Nonnull
private static ElementPath getInstance(@Nonnull final String expression, @Nonnull final Dataset<Row> dataset, @Nonnull final Column idColumn, @Nonnull final Optional<Column> eidColumn, @Nonnull final Column valueColumn, final boolean singular, @Nonnull final Optional<ResourcePath> currentResource, @Nonnull final Optional<Column> thisColumn, @Nonnull final FHIRDefinedType fhirType) {
// Look up the class that represents an element with the specified FHIR type.
final Class<? extends ElementPath> elementPathClass = ElementDefinition.elementClassForType(fhirType).orElse(ElementPath.class);
final DatasetWithColumnMap datasetWithColumns = eidColumn.map(eidCol -> createColumns(dataset, eidCol, valueColumn)).orElseGet(() -> createColumns(dataset, valueColumn));
try {
// Call its constructor and return.
final Constructor<? extends ElementPath> constructor = elementPathClass.getDeclaredConstructor(String.class, Dataset.class, Column.class, Optional.class, Column.class, boolean.class, Optional.class, Optional.class, FHIRDefinedType.class);
return constructor.newInstance(expression, datasetWithColumns.getDataset(), idColumn, eidColumn.map(datasetWithColumns::getColumn), datasetWithColumns.getColumn(valueColumn), singular, currentResource, thisColumn, fhirType);
} catch (final NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
throw new RuntimeException("Problem building an ElementPath class", e);
}
}
use of au.csiro.pathling.QueryHelpers.DatasetWithColumnMap in project pathling by aehrc.
the class AggregateExecutor method buildQuery.
/**
* @param query an {@link AggregateRequest}
* @return a {@link ResultWithExpressions}, which includes the uncollected {@link Dataset}
*/
@SuppressWarnings("WeakerAccess")
@Nonnull
public ResultWithExpressions buildQuery(@Nonnull final AggregateRequest query) {
log.info("Executing request: {}", query);
// Build a new expression parser, and parse all of the filter and grouping expressions within
// the query.
final ResourcePath inputContext = ResourcePath.build(getFhirContext(), getDatabase(), query.getSubjectResource(), query.getSubjectResource().toCode(), true);
final ParserContext groupingAndFilterContext = buildParserContext(inputContext, Collections.singletonList(inputContext.getIdColumn()));
final Parser parser = new Parser(groupingAndFilterContext);
final List<FhirPath> filters = parseFilters(parser, query.getFilters());
final List<FhirPathAndContext> groupingParseResult = parseMaterializableExpressions(groupingAndFilterContext, query.getGroupings(), "Grouping");
final List<FhirPath> groupings = groupingParseResult.stream().map(FhirPathAndContext::getFhirPath).collect(Collectors.toList());
// Join all filter and grouping expressions together.
final Column idColumn = inputContext.getIdColumn();
Dataset<Row> groupingsAndFilters = joinExpressionsAndFilters(inputContext, groupings, filters, idColumn);
// Apply filters.
groupingsAndFilters = applyFilters(groupingsAndFilters, filters);
// Remove synthetic fields from struct values (such as _fid) before grouping.
final DatasetWithColumnMap datasetWithNormalizedGroupings = createColumns(groupingsAndFilters, groupings.stream().map(FhirPath::getValueColumn).map(PathlingFunctions::pruneSyntheticFields).toArray(Column[]::new));
groupingsAndFilters = datasetWithNormalizedGroupings.getDataset();
final List<Column> groupingColumns = new ArrayList<>(datasetWithNormalizedGroupings.getColumnMap().values());
// The input context will be identical to that used for the groupings and filters, except that
// it will use the dataset that resulted from the parsing of the groupings and filters,
// instead of just the raw resource. This is so that any aggregations that are performed
// during the parse can use these columns for grouping, rather than the identity of each
// resource.
final ResourcePath aggregationContext = inputContext.copy(inputContext.getExpression(), groupingsAndFilters, idColumn, inputContext.getEidColumn(), inputContext.getValueColumn(), inputContext.isSingular(), Optional.empty());
final ParserContext aggregationParserContext = buildParserContext(aggregationContext, groupingColumns);
final Parser aggregationParser = new Parser(aggregationParserContext);
// Parse the aggregations, and grab the updated grouping columns. When aggregations are
// performed during an aggregation parse, the grouping columns need to be updated, as any
// aggregation operation erases the previous columns that were built up within the dataset.
final List<FhirPath> aggregations = parseAggregations(aggregationParser, query.getAggregations());
// Join the aggregations together, using equality of the grouping column values as the join
// condition.
final List<Column> aggregationColumns = aggregations.stream().map(FhirPath::getValueColumn).collect(Collectors.toList());
final Dataset<Row> joinedAggregations = joinExpressionsByColumns(aggregations, groupingColumns);
// The final column selection will be the grouping columns, followed by the aggregation
// columns.
final List<Column> finalSelection = new ArrayList<>(groupingColumns);
finalSelection.addAll(aggregationColumns);
final Dataset<Row> finalDataset = joinedAggregations.select(finalSelection.toArray(new Column[0])).distinct();
return new ResultWithExpressions(finalDataset, aggregations, groupings, filters);
}
use of au.csiro.pathling.QueryHelpers.DatasetWithColumnMap in project pathling by aehrc.
the class ResourcePath method build.
/**
* Build a new ResourcePath using the supplied {@link FhirContext} and {@link Database}.
*
* @param fhirContext the {@link FhirContext} to use for sourcing the resource definition
* @param database the {@link Database} to use for retrieving the Dataset
* @param resourceType the type of the resource
* @param expression the expression to use in the resulting path
* @param singular whether the resulting path should be flagged as a single item collection
* @param skipAliasing set to true to skip column aliasing
* @return A shiny new ResourcePath
*/
@Nonnull
public static ResourcePath build(@Nonnull final FhirContext fhirContext, @Nonnull final Database database, @Nonnull final ResourceType resourceType, @Nonnull final String expression, final boolean singular, final boolean skipAliasing) {
// Get the resource definition from HAPI.
final String resourceCode = resourceType.toCode();
final RuntimeResourceDefinition hapiDefinition = fhirContext.getResourceDefinition(resourceCode);
final ResourceDefinition definition = new ResourceDefinition(resourceType, hapiDefinition);
// Retrieve the dataset for the resource type using the supplied resource reader.
final Dataset<Row> dataset = database.read(resourceType);
final Column idColumn = col("id");
final Column finalIdColumn;
final Dataset<Row> finalDataset;
final Map<String, Column> elementsToColumns;
if (skipAliasing) {
// If aliasing is disabled, the dataset will contain columns with the original element names.
// This is used for contexts where we need the original column names for encoding (e.g.
// search).
finalDataset = dataset;
finalIdColumn = idColumn;
elementsToColumns = Stream.of(dataset.columns()).collect(Collectors.toMap(Function.identity(), functions::col, (a, b) -> null));
} else {
// If aliasing is enabled, all columns in the dataset will be aliased, and the original
// columns will be dropped. This is to avoid column name clashes when doing joins.
final DatasetWithColumnMap datasetWithColumnMap = aliasAllColumns(dataset);
finalDataset = datasetWithColumnMap.getDataset();
final Map<Column, Column> columnMap = datasetWithColumnMap.getColumnMap();
elementsToColumns = columnMap.keySet().stream().collect(Collectors.toMap(Column::toString, columnMap::get, (a, b) -> null));
finalIdColumn = elementsToColumns.get(idColumn.toString());
}
// We use the ID column as the value column for a ResourcePath.
return new ResourcePath(expression, finalDataset, finalIdColumn, Optional.empty(), finalIdColumn, singular, Optional.empty(), definition, elementsToColumns);
}
Aggregations