Search in sources :

Example 1 with DataRowDataProvider

use of com.ibm.cohort.datarow.engine.DataRowDataProvider in project quality-measure-and-cohort-service by Alvearie.

the class SparkCqlEvaluator method evaluate.

/**
 * Evaluate the input CQL for a single context + data pair.
 *
 * @param libraryProvider Library provider providing CQL/ELM content
 * @param termProvider    Terminology provider providing terminology resources
 * @param funProvider     External function provider providing static CQL functions
 * @param contextName     Context name corresponding to the library context key
 *                        currently under evaluation.
 * @param resultsSchema   StructType containing the schema data for the output table
 *                        that will be created.
 * @param rowsByContext   Data for a single evaluation context
 * @param dataTypeAliases Mapping of data type to abstract type
 * @param perContextAccum Spark accumulator that tracks each individual context
 *                        evaluation
 * @param errorAccum      Spark accumulator that tracks CQL evaluation errors
 * @param batchRunTime    Single unified timestamp for all contexts
 * @return Evaluation results for all expressions evaluated keyed by the context
 *         ID. Expression names are automatically namespaced according to the
 *         library name to avoid issues arising for expression names matching
 *         between libraries (e.g. LibraryName.ExpressionName).
 * @throws Exception on general failure including CQL library loading issues
 */
protected Iterator<Tuple2<Object, Row>> evaluate(CqlLibraryProvider libraryProvider, CqlTerminologyProvider termProvider, ExternalFunctionProvider funProvider, String contextName, StructType resultsSchema, Tuple2<Object, List<Row>> rowsByContext, Map<String, String> dataTypeAliases, LongAccumulator perContextAccum, CollectionAccumulator<EvaluationError> errorAccum, ZonedDateTime batchRunTime) throws Exception {
    // Convert the Spark objects to the cohort Java model
    List<DataRow> datarows = rowsByContext._2().stream().map(getDataRowFactory()).collect(Collectors.toList());
    Map<String, List<Object>> dataByDataType = new HashMap<>();
    for (DataRow datarow : datarows) {
        String dataType = (String) datarow.getValue(ContextRetriever.SOURCE_FACT_IDX);
        List<Object> mappedRows = dataByDataType.computeIfAbsent(dataType, x -> new ArrayList<>());
        mappedRows.add(datarow);
        if (dataTypeAliases.containsKey(dataType)) {
            String mappedType = dataTypeAliases.get(dataType);
            List<Object> aliasedRows = dataByDataType.computeIfAbsent(mappedType, x -> new ArrayList<>());
            aliasedRows.add(datarow);
        }
    }
    DataRowRetrieveProvider retrieveProvider = new DataRowRetrieveProvider(dataByDataType, termProvider);
    CqlDataProvider dataProvider = new DataRowDataProvider(getDataRowClass(), retrieveProvider);
    CqlEvaluator evaluator = new CqlEvaluator().setLibraryProvider(libraryProvider).setDataProvider(dataProvider).setTerminologyProvider(termProvider).setExternalFunctionProvider(funProvider);
    CqlEvaluationRequests requests = getFilteredJobSpecificationWithIds();
    SparkOutputColumnEncoder columnEncoder = getSparkOutputColumnEncoder();
    return evaluate(rowsByContext, contextName, resultsSchema, evaluator, requests, columnEncoder, perContextAccum, errorAccum, batchRunTime);
}
Also used : HashMap(java.util.HashMap) DataRowDataProvider(com.ibm.cohort.datarow.engine.DataRowDataProvider) DataRowRetrieveProvider(com.ibm.cohort.datarow.engine.DataRowRetrieveProvider) SparkDataRow(com.ibm.cohort.cql.spark.data.SparkDataRow) DataRow(com.ibm.cohort.datarow.model.DataRow) SparkOutputColumnEncoder(com.ibm.cohort.cql.spark.data.SparkOutputColumnEncoder) List(java.util.List) ArrayList(java.util.ArrayList) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests) CqlDataProvider(com.ibm.cohort.cql.data.CqlDataProvider) CqlEvaluator(com.ibm.cohort.cql.evaluation.CqlEvaluator)

Aggregations

CqlDataProvider (com.ibm.cohort.cql.data.CqlDataProvider)1 CqlEvaluationRequests (com.ibm.cohort.cql.evaluation.CqlEvaluationRequests)1 CqlEvaluator (com.ibm.cohort.cql.evaluation.CqlEvaluator)1 SparkDataRow (com.ibm.cohort.cql.spark.data.SparkDataRow)1 SparkOutputColumnEncoder (com.ibm.cohort.cql.spark.data.SparkOutputColumnEncoder)1 DataRowDataProvider (com.ibm.cohort.datarow.engine.DataRowDataProvider)1 DataRowRetrieveProvider (com.ibm.cohort.datarow.engine.DataRowRetrieveProvider)1 DataRow (com.ibm.cohort.datarow.model.DataRow)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1