use of com.ibm.cohort.datarow.engine.DataRowDataProvider in project quality-measure-and-cohort-service by Alvearie.
the class SparkCqlEvaluator method evaluate.
/**
* Evaluate the input CQL for a single context + data pair.
*
* @param libraryProvider Library provider providing CQL/ELM content
* @param termProvider Terminology provider providing terminology resources
* @param funProvider External function provider providing static CQL functions
* @param contextName Context name corresponding to the library context key
* currently under evaluation.
* @param resultsSchema StructType containing the schema data for the output table
* that will be created.
* @param rowsByContext Data for a single evaluation context
* @param dataTypeAliases Mapping of data type to abstract type
* @param perContextAccum Spark accumulator that tracks each individual context
* evaluation
* @param errorAccum Spark accumulator that tracks CQL evaluation errors
* @param batchRunTime Single unified timestamp for all contexts
* @return Evaluation results for all expressions evaluated keyed by the context
* ID. Expression names are automatically namespaced according to the
* library name to avoid issues arising for expression names matching
* between libraries (e.g. LibraryName.ExpressionName).
* @throws Exception on general failure including CQL library loading issues
*/
protected Iterator<Tuple2<Object, Row>> evaluate(CqlLibraryProvider libraryProvider, CqlTerminologyProvider termProvider, ExternalFunctionProvider funProvider, String contextName, StructType resultsSchema, Tuple2<Object, List<Row>> rowsByContext, Map<String, String> dataTypeAliases, LongAccumulator perContextAccum, CollectionAccumulator<EvaluationError> errorAccum, ZonedDateTime batchRunTime) throws Exception {
// Convert the Spark objects to the cohort Java model
List<DataRow> datarows = rowsByContext._2().stream().map(getDataRowFactory()).collect(Collectors.toList());
Map<String, List<Object>> dataByDataType = new HashMap<>();
for (DataRow datarow : datarows) {
String dataType = (String) datarow.getValue(ContextRetriever.SOURCE_FACT_IDX);
List<Object> mappedRows = dataByDataType.computeIfAbsent(dataType, x -> new ArrayList<>());
mappedRows.add(datarow);
if (dataTypeAliases.containsKey(dataType)) {
String mappedType = dataTypeAliases.get(dataType);
List<Object> aliasedRows = dataByDataType.computeIfAbsent(mappedType, x -> new ArrayList<>());
aliasedRows.add(datarow);
}
}
DataRowRetrieveProvider retrieveProvider = new DataRowRetrieveProvider(dataByDataType, termProvider);
CqlDataProvider dataProvider = new DataRowDataProvider(getDataRowClass(), retrieveProvider);
CqlEvaluator evaluator = new CqlEvaluator().setLibraryProvider(libraryProvider).setDataProvider(dataProvider).setTerminologyProvider(termProvider).setExternalFunctionProvider(funProvider);
CqlEvaluationRequests requests = getFilteredJobSpecificationWithIds();
SparkOutputColumnEncoder columnEncoder = getSparkOutputColumnEncoder();
return evaluate(rowsByContext, contextName, resultsSchema, evaluator, requests, columnEncoder, perContextAccum, errorAccum, batchRunTime);
}
Aggregations