use of com.ibm.cohort.datarow.model.DataRow in project quality-measure-and-cohort-service by Alvearie.
the class AnyColumnFunctions method AnyColumnRegex.
public static Object AnyColumnRegex(Object object, String regex) {
DataRow dataRow = (DataRow) object;
StringMatcher matcher = new RegexStringMatcher(regex);
return dataRow.getFieldNames().stream().filter(matcher).map(dataRow::getValue).collect(Collectors.toList());
}
use of com.ibm.cohort.datarow.model.DataRow in project quality-measure-and-cohort-service by Alvearie.
the class AnyColumnFunctions method AnyColumn.
public static Object AnyColumn(Object object, String fieldPrefix) {
DataRow dataRow = (DataRow) object;
StringMatcher matcher = new PrefixStringMatcher(fieldPrefix);
return dataRow.getFieldNames().stream().filter(matcher).map(dataRow::getValue).collect(Collectors.toList());
}
use of com.ibm.cohort.datarow.model.DataRow in project quality-measure-and-cohort-service by Alvearie.
the class SparkCqlEvaluator method evaluate.
/**
* Evaluate the input CQL for a single context + data pair.
*
* @param libraryProvider Library provider providing CQL/ELM content
* @param termProvider Terminology provider providing terminology resources
* @param funProvider External function provider providing static CQL functions
* @param contextName Context name corresponding to the library context key
* currently under evaluation.
* @param resultsSchema StructType containing the schema data for the output table
* that will be created.
* @param rowsByContext Data for a single evaluation context
* @param dataTypeAliases Mapping of data type to abstract type
* @param perContextAccum Spark accumulator that tracks each individual context
* evaluation
* @param errorAccum Spark accumulator that tracks CQL evaluation errors
* @param batchRunTime Single unified timestamp for all contexts
* @return Evaluation results for all expressions evaluated keyed by the context
* ID. Expression names are automatically namespaced according to the
* library name to avoid issues arising for expression names matching
* between libraries (e.g. LibraryName.ExpressionName).
* @throws Exception on general failure including CQL library loading issues
*/
protected Iterator<Tuple2<Object, Row>> evaluate(CqlLibraryProvider libraryProvider, CqlTerminologyProvider termProvider, ExternalFunctionProvider funProvider, String contextName, StructType resultsSchema, Tuple2<Object, List<Row>> rowsByContext, Map<String, String> dataTypeAliases, LongAccumulator perContextAccum, CollectionAccumulator<EvaluationError> errorAccum, ZonedDateTime batchRunTime) throws Exception {
// Convert the Spark objects to the cohort Java model
List<DataRow> datarows = rowsByContext._2().stream().map(getDataRowFactory()).collect(Collectors.toList());
Map<String, List<Object>> dataByDataType = new HashMap<>();
for (DataRow datarow : datarows) {
String dataType = (String) datarow.getValue(ContextRetriever.SOURCE_FACT_IDX);
List<Object> mappedRows = dataByDataType.computeIfAbsent(dataType, x -> new ArrayList<>());
mappedRows.add(datarow);
if (dataTypeAliases.containsKey(dataType)) {
String mappedType = dataTypeAliases.get(dataType);
List<Object> aliasedRows = dataByDataType.computeIfAbsent(mappedType, x -> new ArrayList<>());
aliasedRows.add(datarow);
}
}
DataRowRetrieveProvider retrieveProvider = new DataRowRetrieveProvider(dataByDataType, termProvider);
CqlDataProvider dataProvider = new DataRowDataProvider(getDataRowClass(), retrieveProvider);
CqlEvaluator evaluator = new CqlEvaluator().setLibraryProvider(libraryProvider).setDataProvider(dataProvider).setTerminologyProvider(termProvider).setExternalFunctionProvider(funProvider);
CqlEvaluationRequests requests = getFilteredJobSpecificationWithIds();
SparkOutputColumnEncoder columnEncoder = getSparkOutputColumnEncoder();
return evaluate(rowsByContext, contextName, resultsSchema, evaluator, requests, columnEncoder, perContextAccum, errorAccum, batchRunTime);
}
use of com.ibm.cohort.datarow.model.DataRow in project quality-measure-and-cohort-service by Alvearie.
the class AnyColumnFunctionsTest method testAnyColumnRegexNoMatches.
@Test
public void testAnyColumnRegexNoMatches() {
DataRow row = spy(DataRow.class);
Set<String> allFields = new HashSet<>();
allFields.add("matchingField1");
allFields.add("matchingField2");
allFields.add("nonMatchingField");
doReturn(allFields).when(row).getFieldNames();
doReturn("matchingValue1").when(row).getValue("matchingField1");
doReturn("matchingValue2").when(row).getValue("matchingField2");
doReturn("nonMatchingValue").when(row).getValue("nonMatchingField");
String regex = "prefix[0-9]+";
List<Object> actual = (List<Object>) AnyColumnFunctions.AnyColumnRegex(row, regex);
assertThat(actual, empty());
}
use of com.ibm.cohort.datarow.model.DataRow in project quality-measure-and-cohort-service by Alvearie.
the class AnyColumnFunctionsTest method testAnyColumnHasMatches.
@Test
public void testAnyColumnHasMatches() {
String matchingField1 = "matchingField1";
String matchingField2 = "matchingField2";
String nonMatchingField = "nonMatchingField";
String expectedValue1 = "matchingValue1";
String expectedValue2 = "matchingValue2";
DataRow row = spy(DataRow.class);
Set<String> allFields = new HashSet<>();
allFields.add(matchingField1);
allFields.add(matchingField2);
allFields.add(nonMatchingField);
doReturn(allFields).when(row).getFieldNames();
doReturn(expectedValue1).when(row).getValue(matchingField1);
doReturn(expectedValue2).when(row).getValue(matchingField2);
doReturn("nonMatchingValue").when(row).getValue(nonMatchingField);
String prefix = "matchingField";
List<Object> actual = (List<Object>) AnyColumnFunctions.AnyColumn(row, prefix);
assertThat(actual, containsInAnyOrder(expectedValue1, expectedValue2));
}
Aggregations