Search in sources :

Example 1 with CqlEvaluationRequests

use of com.ibm.cohort.cql.evaluation.CqlEvaluationRequests in project quality-measure-and-cohort-service by Alvearie.

the class SparkCqlEvaluator method calculateSparkSchema.

/**
 * Auto-detect an output schema for 1 or more contexts using program metadata files
 * and the CQL definitions that will be used by the engine.
 *
 * @param contextNames          List of context names to calculate schemas for.
 * @param contextDefinitions    Context definitions used during schema calculation. Used to
 *                              detect the key column for each context.
 * @param encoder               Encoder used to calculate the output column names to use for
 *                              each output schema.
 * @param cqlTranslator         Pre-configured CQL Translator instance
 * @return Map of context name to the output Spark schema for that context. The map will only
 *         contain entries for each context name that included in the contextNames list
 *         used as input to this function.
 * @throws Exception if deserialization errors occur when reading in any of the input files
 *         or if inferring an output schema fails for any reason.
 */
protected Map<String, StructType> calculateSparkSchema(List<String> contextNames, ContextDefinitions contextDefinitions, SparkOutputColumnEncoder encoder, CqlToElmTranslator cqlTranslator) throws Exception {
    CqlLibraryProvider libProvider = SparkCqlEvaluator.libraryProvider.get();
    if (libProvider == null) {
        libProvider = createLibraryProvider();
        SparkCqlEvaluator.libraryProvider.set(libProvider);
    }
    CqlEvaluationRequests cqlEvaluationRequests = getFilteredJobSpecificationWithIds();
    SparkSchemaCreator sparkSchemaCreator = new SparkSchemaCreator(libProvider, cqlEvaluationRequests, contextDefinitions, encoder, cqlTranslator);
    return sparkSchemaCreator.calculateSchemasForContexts(contextNames);
}
Also used : SparkSchemaCreator(com.ibm.cohort.cql.spark.data.SparkSchemaCreator) CqlLibraryProvider(com.ibm.cohort.cql.library.CqlLibraryProvider) PriorityCqlLibraryProvider(com.ibm.cohort.cql.library.PriorityCqlLibraryProvider) HadoopBasedCqlLibraryProvider(com.ibm.cohort.cql.library.HadoopBasedCqlLibraryProvider) TranslatingCqlLibraryProvider(com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider) ClasspathCqlLibraryProvider(com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests)

Example 2 with CqlEvaluationRequests

use of com.ibm.cohort.cql.evaluation.CqlEvaluationRequests in project quality-measure-and-cohort-service by Alvearie.

the class SparkCqlEvaluator method getFilteredRequests.

/**
 * @param requests     Request object to filter.
 * @param libraries    Map of library id to version used for filtering
 *                     down request based on library id. If this argument
 *                     is null or empty, then no library id filtering
 *                     is performed.
 * @param expressions  Used to optionally override which expressions will
 *                     run for each individual CqlEvaluationRequest. If this
 *                     argument is null or empty, no expressions are overwritten.
 *
 * @return CqlEvaluationRequests with the original requests optionally filtered
 *         based on the library ids the.
 *         Requests will optionally have their expressions overridden
 *         by args.expressions. if any are provided.
 *         Individual requests will also will also have any global
 *         parameters set on each individual CqlEvaluationRequest.
 */
protected CqlEvaluationRequests getFilteredRequests(CqlEvaluationRequests requests, Map<String, String> libraries, Collection<String> expressions) {
    if (requests != null) {
        List<CqlEvaluationRequest> evaluations = requests.getEvaluations();
        if (libraries != null && !libraries.isEmpty()) {
            evaluations = evaluations.stream().filter(r -> libraries.keySet().contains(r.getDescriptor().getLibraryId())).collect(Collectors.toList());
        }
        if (expressions != null && !expressions.isEmpty()) {
            evaluations.forEach(x -> x.setExpressions(x.getExpressions().stream().filter(e -> expressions.contains(e.getName())).collect(Collectors.toSet())));
        }
        if (requests.getGlobalParameters() != null) {
            for (CqlEvaluationRequest evaluation : evaluations) {
                for (Map.Entry<String, Parameter> globalParameter : requests.getGlobalParameters().entrySet()) {
                    Map<String, Parameter> parameters = evaluation.getParameters();
                    if (parameters == null) {
                        evaluation.setParameters(new HashMap<>());
                        parameters = evaluation.getParameters();
                    }
                    parameters.putIfAbsent(globalParameter.getKey(), globalParameter.getValue());
                }
            }
        }
        requests.setEvaluations(evaluations);
        jobSpecification.set(requests);
    }
    return requests;
}
Also used : ModelInfo(org.hl7.elm_modelinfo.r1.ModelInfo) Arrays(java.util.Arrays) CqlDataProvider(com.ibm.cohort.cql.data.CqlDataProvider) CqlToElmTranslator(com.ibm.cohort.cql.translation.CqlToElmTranslator) FileSystem(org.apache.hadoop.fs.FileSystem) ZonedDateTime(java.time.ZonedDateTime) LoggerFactory(org.slf4j.LoggerFactory) EncodedParametersCache(com.ibm.cohort.cql.spark.util.EncodedParametersCache) DataRowDataProvider(com.ibm.cohort.datarow.engine.DataRowDataProvider) Format(com.ibm.cohort.cql.library.Format) ClassInfo(org.hl7.elm_modelinfo.r1.ClassInfo) ColumnRuleCreator(com.ibm.cohort.cql.spark.aggregation.ColumnRuleCreator) ConfigurableOutputColumnNameEncoder(com.ibm.cohort.cql.spark.data.ConfigurableOutputColumnNameEncoder) DefaultDatasetRetriever(com.ibm.cohort.cql.spark.data.DefaultDatasetRetriever) SparkSchemaCreator(com.ibm.cohort.cql.spark.data.SparkSchemaCreator) ContextDefinition(com.ibm.cohort.cql.spark.aggregation.ContextDefinition) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) CqlEvaluationRequest(com.ibm.cohort.cql.evaluation.CqlEvaluationRequest) ModelUtils(com.ibm.cohort.cql.spark.optimizer.ModelUtils) ConstraintViolation(javax.validation.ConstraintViolation) StructType(org.apache.spark.sql.types.StructType) HadoopPathOutputMetadataWriter(com.ibm.cohort.cql.spark.metadata.HadoopPathOutputMetadataWriter) Collection(java.util.Collection) Set(java.util.Set) Validator(javax.validation.Validator) CqlLibraryProvider(com.ibm.cohort.cql.library.CqlLibraryProvider) Reader(java.io.Reader) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) ContextRetriever(com.ibm.cohort.cql.spark.aggregation.ContextRetriever) List(java.util.List) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests) CqlTerminologyProvider(com.ibm.cohort.cql.terminology.CqlTerminologyProvider) QName(javax.xml.namespace.QName) SparkOutputColumnEncoder(com.ibm.cohort.cql.spark.data.SparkOutputColumnEncoder) Parameter(com.ibm.cohort.cql.evaluation.parameters.Parameter) MapUtils(com.ibm.cohort.cql.util.MapUtils) ExternalFunctionProvider(org.opencds.cqf.cql.engine.data.ExternalFunctionProvider) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Dataset(org.apache.spark.sql.Dataset) SerializableConfiguration(org.apache.spark.util.SerializableConfiguration) CqlEvaluationResult(com.ibm.cohort.cql.evaluation.CqlEvaluationResult) SparkDataRow(com.ibm.cohort.cql.spark.data.SparkDataRow) PriorityCqlLibraryProvider(com.ibm.cohort.cql.library.PriorityCqlLibraryProvider) R4FileSystemFhirTerminologyProvider(com.ibm.cohort.cql.terminology.R4FileSystemFhirTerminologyProvider) HashMap(java.util.HashMap) ValidatorFactory(javax.validation.ValidatorFactory) HadoopBasedCqlLibraryProvider(com.ibm.cohort.cql.library.HadoopBasedCqlLibraryProvider) SparkTypeConverter(com.ibm.cohort.cql.spark.data.SparkTypeConverter) Function(java.util.function.Function) ArrayList(java.util.ArrayList) CustomMetricSparkPlugin(com.ibm.cohort.cql.spark.metrics.CustomMetricSparkPlugin) CollectionUtils(org.apache.commons.collections.CollectionUtils) EvaluationError(com.ibm.cohort.cql.spark.errors.EvaluationError) DataRowRetrieveProvider(com.ibm.cohort.datarow.engine.DataRowRetrieveProvider) Validation(javax.validation.Validation) TranslatingCqlLibraryProvider(com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider) NoSuchElementException(java.util.NoSuchElementException) EvaluationSummary(com.ibm.cohort.cql.spark.metadata.EvaluationSummary) DataRow(com.ibm.cohort.datarow.model.DataRow) SparkSession(org.apache.spark.sql.SparkSession) PrintStream(java.io.PrintStream) CqlEvaluator(com.ibm.cohort.cql.evaluation.CqlEvaluator) ClasspathCqlLibraryProvider(com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider) SaveMode(org.apache.spark.sql.SaveMode) Logger(org.slf4j.Logger) CqlDebug(com.ibm.cohort.cql.evaluation.CqlDebug) Iterator(java.util.Iterator) RowFactory(org.apache.spark.sql.RowFactory) JCommander(com.beust.jcommander.JCommander) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) Row(org.apache.spark.sql.Row) CollectionAccumulator(org.apache.spark.util.CollectionAccumulator) InputStreamReader(java.io.InputStreamReader) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) LongAccumulator(org.apache.spark.util.LongAccumulator) CqlExpressionConfiguration(com.ibm.cohort.cql.evaluation.CqlExpressionConfiguration) AnyColumnFunctions(com.ibm.cohort.cql.functions.AnyColumnFunctions) MDC(org.slf4j.MDC) UnsupportedTerminologyProvider(com.ibm.cohort.cql.terminology.UnsupportedTerminologyProvider) CohortExternalFunctionProvider(com.ibm.cohort.cql.functions.CohortExternalFunctionProvider) ContextDefinitions(com.ibm.cohort.cql.spark.aggregation.ContextDefinitions) Collections(java.util.Collections) OutputMetadataWriter(com.ibm.cohort.cql.spark.metadata.OutputMetadataWriter) CqlEvaluationRequest(com.ibm.cohort.cql.evaluation.CqlEvaluationRequest) Parameter(com.ibm.cohort.cql.evaluation.parameters.Parameter) Map(java.util.Map) HashMap(java.util.HashMap)

Example 3 with CqlEvaluationRequests

use of com.ibm.cohort.cql.evaluation.CqlEvaluationRequests in project quality-measure-and-cohort-service by Alvearie.

the class SparkCqlEvaluator method evaluate.

/**
 * Evaluate the input CQL for a single context + data pair.
 *
 * @param libraryProvider Library provider providing CQL/ELM content
 * @param termProvider    Terminology provider providing terminology resources
 * @param funProvider     External function provider providing static CQL functions
 * @param contextName     Context name corresponding to the library context key
 *                        currently under evaluation.
 * @param resultsSchema   StructType containing the schema data for the output table
 *                        that will be created.
 * @param rowsByContext   Data for a single evaluation context
 * @param dataTypeAliases Mapping of data type to abstract type
 * @param perContextAccum Spark accumulator that tracks each individual context
 *                        evaluation
 * @param errorAccum      Spark accumulator that tracks CQL evaluation errors
 * @param batchRunTime    Single unified timestamp for all contexts
 * @return Evaluation results for all expressions evaluated keyed by the context
 *         ID. Expression names are automatically namespaced according to the
 *         library name to avoid issues arising for expression names matching
 *         between libraries (e.g. LibraryName.ExpressionName).
 * @throws Exception on general failure including CQL library loading issues
 */
protected Iterator<Tuple2<Object, Row>> evaluate(CqlLibraryProvider libraryProvider, CqlTerminologyProvider termProvider, ExternalFunctionProvider funProvider, String contextName, StructType resultsSchema, Tuple2<Object, List<Row>> rowsByContext, Map<String, String> dataTypeAliases, LongAccumulator perContextAccum, CollectionAccumulator<EvaluationError> errorAccum, ZonedDateTime batchRunTime) throws Exception {
    // Convert the Spark objects to the cohort Java model
    List<DataRow> datarows = rowsByContext._2().stream().map(getDataRowFactory()).collect(Collectors.toList());
    Map<String, List<Object>> dataByDataType = new HashMap<>();
    for (DataRow datarow : datarows) {
        String dataType = (String) datarow.getValue(ContextRetriever.SOURCE_FACT_IDX);
        List<Object> mappedRows = dataByDataType.computeIfAbsent(dataType, x -> new ArrayList<>());
        mappedRows.add(datarow);
        if (dataTypeAliases.containsKey(dataType)) {
            String mappedType = dataTypeAliases.get(dataType);
            List<Object> aliasedRows = dataByDataType.computeIfAbsent(mappedType, x -> new ArrayList<>());
            aliasedRows.add(datarow);
        }
    }
    DataRowRetrieveProvider retrieveProvider = new DataRowRetrieveProvider(dataByDataType, termProvider);
    CqlDataProvider dataProvider = new DataRowDataProvider(getDataRowClass(), retrieveProvider);
    CqlEvaluator evaluator = new CqlEvaluator().setLibraryProvider(libraryProvider).setDataProvider(dataProvider).setTerminologyProvider(termProvider).setExternalFunctionProvider(funProvider);
    CqlEvaluationRequests requests = getFilteredJobSpecificationWithIds();
    SparkOutputColumnEncoder columnEncoder = getSparkOutputColumnEncoder();
    return evaluate(rowsByContext, contextName, resultsSchema, evaluator, requests, columnEncoder, perContextAccum, errorAccum, batchRunTime);
}
Also used : HashMap(java.util.HashMap) DataRowDataProvider(com.ibm.cohort.datarow.engine.DataRowDataProvider) DataRowRetrieveProvider(com.ibm.cohort.datarow.engine.DataRowRetrieveProvider) SparkDataRow(com.ibm.cohort.cql.spark.data.SparkDataRow) DataRow(com.ibm.cohort.datarow.model.DataRow) SparkOutputColumnEncoder(com.ibm.cohort.cql.spark.data.SparkOutputColumnEncoder) List(java.util.List) ArrayList(java.util.ArrayList) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests) CqlDataProvider(com.ibm.cohort.cql.data.CqlDataProvider) CqlEvaluator(com.ibm.cohort.cql.evaluation.CqlEvaluator)

Example 4 with CqlEvaluationRequests

use of com.ibm.cohort.cql.evaluation.CqlEvaluationRequests in project quality-measure-and-cohort-service by Alvearie.

the class SparkCqlEvaluatorTest method testParameterMatrixOutputSimpleSuccess.

@Test
public void testParameterMatrixOutputSimpleSuccess() throws Exception {
    String outputLocation = "target/output/param-matrix/patient_cohort";
    CqlEvaluationRequest template = new CqlEvaluationRequest();
    template.setDescriptor(new CqlLibraryDescriptor().setLibraryId("SampleLibrary").setVersion("1.0.0"));
    template.setExpressionsByNames(Collections.singleton("IsFemale"));
    template.setContextKey("Patient");
    template.setContextValue("NA");
    CqlEvaluationRequests requests = new CqlEvaluationRequests();
    requests.setEvaluations(new ArrayList<>());
    List<Integer> ages = Arrays.asList(15, 17, 18);
    for (Integer age : ages) {
        Map<String, Parameter> parameters = new HashMap<>();
        parameters.put("MinimumAge", new IntegerParameter(age));
        CqlEvaluationRequest request = new CqlEvaluationRequest(template);
        request.setParameters(parameters);
        requests.getEvaluations().add(request);
    }
    ObjectMapper om = new ObjectMapper();
    File jobsFile = new File("target/output/param-matrix-simple/cql-jobs.json");
    if (!jobsFile.exists()) {
        jobsFile.getParentFile().mkdirs();
    }
    FileUtils.write(jobsFile, om.writeValueAsString(requests), StandardCharsets.UTF_8);
    try {
        String[] args = new String[] { "-d", "src/test/resources/simple-job/context-definitions.json", "-j", jobsFile.getPath(), "-m", "src/test/resources/simple-job/modelinfo/simple-modelinfo-1.0.0.xml", "-c", "src/test/resources/simple-job/cql", "-i", "Patient=" + new File("src/test/resources/simple-job/testdata/patient").toURI().toString(), "-o", "Patient=" + new File(outputLocation).toURI().toString(), "--output-format", "delta", "--overwrite-output-for-contexts", "--metadata-output-path", outputLocation };
        SparkCqlEvaluator.main(args);
        validateOutputCountsAndColumns(outputLocation, new HashSet<>(Arrays.asList("id", "parameters", "SampleLibrary|IsFemale")), 10 * ages.size(), "delta");
    } finally {
        jobsFile.delete();
    }
}
Also used : IntegerParameter(com.ibm.cohort.cql.evaluation.parameters.IntegerParameter) HashMap(java.util.HashMap) CqlEvaluationRequest(com.ibm.cohort.cql.evaluation.CqlEvaluationRequest) IntegerParameter(com.ibm.cohort.cql.evaluation.parameters.IntegerParameter) DateParameter(com.ibm.cohort.cql.evaluation.parameters.DateParameter) Parameter(com.ibm.cohort.cql.evaluation.parameters.Parameter) IntervalParameter(com.ibm.cohort.cql.evaluation.parameters.IntervalParameter) DecimalParameter(com.ibm.cohort.cql.evaluation.parameters.DecimalParameter) StringParameter(com.ibm.cohort.cql.evaluation.parameters.StringParameter) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests) CqlLibraryDescriptor(com.ibm.cohort.cql.library.CqlLibraryDescriptor) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 5 with CqlEvaluationRequests

use of com.ibm.cohort.cql.evaluation.CqlEvaluationRequests in project quality-measure-and-cohort-service by Alvearie.

the class SparkCqlEvaluatorTest method testGetFilteredRequestsFilterToLibrariesIgnoresVersion.

@Test
public void testGetFilteredRequestsFilterToLibrariesIgnoresVersion() {
    CqlEvaluationRequests requests = new CqlEvaluationRequests();
    CqlEvaluationRequest request = makeEvaluationRequest("context", "lib1", "1.0.0");
    request.setExpressionsByNames(new HashSet<>(Collections.singletonList("cohort")));
    CqlEvaluationRequest request2 = makeEvaluationRequest("context", "lib2", "1.0.0");
    request2.setExpressionsByNames(new HashSet<>(Collections.singletonList("cohort")));
    CqlEvaluationRequest request3 = makeEvaluationRequest("context", "lib3", "1.0.0");
    request.setExpressionsByNames(new HashSet<>(Collections.singletonList("cohort")));
    CqlEvaluationRequest request4 = makeEvaluationRequest("context", "lib4", "1.0.0");
    request2.setExpressionsByNames(new HashSet<>(Collections.singletonList("cohort")));
    List<CqlEvaluationRequest> evaluations = Arrays.asList(request, request2, request3, request4);
    requests.setEvaluations(evaluations);
    Map<String, String> libs = new HashMap<String, String>() {

        {
            put("lib3", "7.0.0");
            put("lib4", "1.0.0");
        }
    };
    CqlEvaluationRequests actual = evaluator.getFilteredRequests(requests, libs, null);
    assertEquals(2, actual.getEvaluations().size());
    for (CqlEvaluationRequest cqlEvaluationRequest : actual.getEvaluations()) {
        assertTrue(libs.containsKey(cqlEvaluationRequest.getDescriptor().getLibraryId()));
    }
}
Also used : HashMap(java.util.HashMap) CqlEvaluationRequest(com.ibm.cohort.cql.evaluation.CqlEvaluationRequest) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests) Test(org.junit.Test)

Aggregations

CqlEvaluationRequests (com.ibm.cohort.cql.evaluation.CqlEvaluationRequests)34 Test (org.junit.Test)27 CqlLibraryDescriptor (com.ibm.cohort.cql.library.CqlLibraryDescriptor)17 CqlEvaluationRequest (com.ibm.cohort.cql.evaluation.CqlEvaluationRequest)14 HashMap (java.util.HashMap)13 ContextDefinitions (com.ibm.cohort.cql.spark.aggregation.ContextDefinitions)10 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)9 File (java.io.File)9 CqlExpressionConfiguration (com.ibm.cohort.cql.evaluation.CqlExpressionConfiguration)8 DateParameter (com.ibm.cohort.cql.evaluation.parameters.DateParameter)6 IntegerParameter (com.ibm.cohort.cql.evaluation.parameters.IntegerParameter)6 IntervalParameter (com.ibm.cohort.cql.evaluation.parameters.IntervalParameter)6 Parameter (com.ibm.cohort.cql.evaluation.parameters.Parameter)6 DecimalParameter (com.ibm.cohort.cql.evaluation.parameters.DecimalParameter)5 StringParameter (com.ibm.cohort.cql.evaluation.parameters.StringParameter)5 ClasspathCqlLibraryProvider (com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider)5 TranslatingCqlLibraryProvider (com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider)5 CqlLibraryProvider (com.ibm.cohort.cql.library.CqlLibraryProvider)4 CqlToElmTranslator (com.ibm.cohort.cql.translation.CqlToElmTranslator)4 Set (java.util.Set)4