Search in sources :

Example 1 with EqualsStringMatcher

use of com.ibm.cohort.cql.util.EqualsStringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class ColumnRuleCreator method getDataRequirementsForContext.

/**
 * Retrieve the merged set of data type and column filters for all CQL jobs that will
 * be evaluated for a given aggregation context.
 *
 * @param context ContextDefinition whose CQL jobs will be interrogated for data requirements
 * @return Map of data type to the fields in that datatype that are used by the CQL jobs
 */
public Map<String, Set<StringMatcher>> getDataRequirementsForContext(ContextDefinition context) {
    Map<CqlLibraryDescriptor, Set<String>> expressionsByLibrary = new HashMap<>();
    for (CqlEvaluationRequest request : requests) {
        Set<String> expressions = expressionsByLibrary.computeIfAbsent(request.getDescriptor(), desc -> new HashSet<>());
        request.getExpressions().stream().forEach(exp -> expressions.add(exp.getName()));
    }
    DataTypeRequirementsProcessor requirementsProcessor = new DataTypeRequirementsProcessor(cqlTranslator);
    Map<String, Set<StringMatcher>> pathsByDataType = new HashMap<>();
    for (Map.Entry<CqlLibraryDescriptor, Set<String>> entry : expressionsByLibrary.entrySet()) {
        LOG.debug("Extracting data requirements for {}", entry.getKey());
        DataTypeRequirementsProcessor.DataTypeRequirements requirements = requirementsProcessor.getDataRequirements(libraryProvider, entry.getKey(), entry.getValue());
        Map<String, Set<StringMatcher>> newPaths = requirements.allAsStringMatcher();
        newPaths.forEach((key, value) -> {
            pathsByDataType.merge(key, value, (prev, current) -> {
                prev.addAll(current);
                return prev;
            });
        });
    }
    Set<StringMatcher> contextFields = pathsByDataType.computeIfAbsent(context.getPrimaryDataType(), dt -> new HashSet<>());
    contextFields.add(new EqualsStringMatcher(context.getPrimaryKeyColumn()));
    if (context.getRelationships() != null) {
        for (Join join : context.getRelationships()) {
            Set<StringMatcher> joinFields = pathsByDataType.get(join.getRelatedDataType());
            if (joinFields != null) {
                joinFields.add(new EqualsStringMatcher(join.getRelatedKeyColumn()));
                joinFields.add(new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX));
                // if the join key is not the primary key of the primary data table, then we need to add in the alternate key
                if (join.getPrimaryDataTypeColumn() != null) {
                    contextFields.add(new EqualsStringMatcher(join.getPrimaryDataTypeColumn()));
                }
                if (join instanceof ManyToMany) {
                    ManyToMany manyToMany = (ManyToMany) join;
                    Set<StringMatcher> associationFields = pathsByDataType.computeIfAbsent(manyToMany.getAssociationDataType(), dt -> new HashSet<>());
                    associationFields.add(new EqualsStringMatcher(manyToMany.getAssociationOneKeyColumn()));
                    associationFields.add(new EqualsStringMatcher(manyToMany.getAssociationManyKeyColumn()));
                }
                if (join instanceof MultiManyToMany) {
                    ManyToMany with = ((MultiManyToMany) join).getWith();
                    while (with != null) {
                        Set<StringMatcher> relatedFields = pathsByDataType.computeIfAbsent(with.getRelatedDataType(), dt -> new HashSet<>());
                        relatedFields.add(new EqualsStringMatcher(with.getRelatedKeyColumn()));
                        relatedFields.add(new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX));
                        with = (with instanceof MultiManyToMany) ? ((MultiManyToMany) with).getWith() : null;
                    }
                }
            }
        }
    }
    pathsByDataType.values().forEach((matcherSet -> {
        matcherSet.add(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX));
    }));
    return pathsByDataType;
}
Also used : EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) Logger(org.slf4j.Logger) CqlToElmTranslator(com.ibm.cohort.cql.translation.CqlToElmTranslator) DataTypeRequirementsProcessor(com.ibm.cohort.cql.spark.optimizer.DataTypeRequirementsProcessor) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) CqlLibraryProvider(com.ibm.cohort.cql.library.CqlLibraryProvider) HashMap(java.util.HashMap) CqlLibraryDescriptor(com.ibm.cohort.cql.library.CqlLibraryDescriptor) HashSet(java.util.HashSet) StringMatcher(com.ibm.cohort.cql.util.StringMatcher) List(java.util.List) Map(java.util.Map) CqlEvaluationRequest(com.ibm.cohort.cql.evaluation.CqlEvaluationRequest) Set(java.util.Set) HashSet(java.util.HashSet) DataTypeRequirementsProcessor(com.ibm.cohort.cql.spark.optimizer.DataTypeRequirementsProcessor) HashMap(java.util.HashMap) CqlEvaluationRequest(com.ibm.cohort.cql.evaluation.CqlEvaluationRequest) EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) StringMatcher(com.ibm.cohort.cql.util.StringMatcher) EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) CqlLibraryDescriptor(com.ibm.cohort.cql.library.CqlLibraryDescriptor) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with EqualsStringMatcher

use of com.ibm.cohort.cql.util.EqualsStringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class ColumnRuleCreatorTest method testGetFiltersForContextOnlyJoinColumns.

@Test
public void testGetFiltersForContextOnlyJoinColumns() throws Exception {
    CqlToElmTranslator cqlTranslator = new CqlToElmTranslator();
    cqlTranslator.registerModelInfo(new File("src/test/resources/alltypes/modelinfo/alltypes-modelinfo-1.0.0.xml"));
    ObjectMapper mapper = new ObjectMapper();
    CqlEvaluationRequests requests = mapper.readValue(new File("src/test/resources/alltypes/metadata/join-only.json"), CqlEvaluationRequests.class);
    CqlLibraryProvider backingProvider = new ClasspathCqlLibraryProvider("alltypes.cql");
    TranslatingCqlLibraryProvider cqlLibraryProvider = new TranslatingCqlLibraryProvider(backingProvider, cqlTranslator);
    ColumnRuleCreator columnRuleCreator = new ColumnRuleCreator(requests.getEvaluations(), cqlTranslator, cqlLibraryProvider);
    ContextDefinitions definitions = mapper.readValue(new File("src/test/resources/alltypes/metadata/context-definitions-related-column.json"), ContextDefinitions.class);
    ContextDefinition context = definitions.getContextDefinitionByName("Patient");
    Map<String, Set<StringMatcher>> actual = columnRuleCreator.getDataRequirementsForContext(context);
    Map<String, Set<StringMatcher>> expected = new HashMap<>();
    expected.put("A", new HashSet<>(Arrays.asList(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX), new EqualsStringMatcher("id_col"), new EqualsStringMatcher("pat_id"))));
    expected.put("B", new HashSet<>(Arrays.asList(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX), new EqualsStringMatcher("string"), new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX))));
    expected.put("C", new HashSet<>(Arrays.asList(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX), new EqualsStringMatcher("pat_id"), new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX))));
    assertEquals(expected, actual);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) TranslatingCqlLibraryProvider(com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider) HashMap(java.util.HashMap) CqlToElmTranslator(com.ibm.cohort.cql.translation.CqlToElmTranslator) ClasspathCqlLibraryProvider(com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider) CqlLibraryProvider(com.ibm.cohort.cql.library.CqlLibraryProvider) TranslatingCqlLibraryProvider(com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider) EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests) ClasspathCqlLibraryProvider(com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 3 with EqualsStringMatcher

use of com.ibm.cohort.cql.util.EqualsStringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class ColumnRuleCreatorTest method testGetFiltersForContext.

@Test
public void testGetFiltersForContext() throws Exception {
    CqlToElmTranslator cqlTranslator = new CqlToElmTranslator();
    cqlTranslator.registerModelInfo(new File("src/test/resources/alltypes/modelinfo/alltypes-modelinfo-1.0.0.xml"));
    ObjectMapper mapper = new ObjectMapper();
    CqlEvaluationRequests requests = mapper.readValue(new File("src/test/resources/alltypes/metadata/parent-child-jobs.json"), CqlEvaluationRequests.class);
    CqlLibraryProvider backingProvider = new ClasspathCqlLibraryProvider("alltypes.cql");
    TranslatingCqlLibraryProvider cqlLibraryProvider = new TranslatingCqlLibraryProvider(backingProvider, cqlTranslator);
    ColumnRuleCreator columnRuleCreator = new ColumnRuleCreator(requests.getEvaluations(), cqlTranslator, cqlLibraryProvider);
    ContextDefinitions definitions = mapper.readValue(new File("src/test/resources/alltypes/metadata/context-definitions.json"), ContextDefinitions.class);
    ContextDefinition context = definitions.getContextDefinitionByName("Patient");
    Map<String, Set<StringMatcher>> actual = columnRuleCreator.getDataRequirementsForContext(context);
    Map<String, Set<StringMatcher>> expected = new HashMap<>();
    expected.put("A", new HashSet<>(Arrays.asList(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX), new EqualsStringMatcher("pat_id"), new EqualsStringMatcher("code_col"), new EqualsStringMatcher("boolean_col"))));
    assertEquals(expected, actual);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) TranslatingCqlLibraryProvider(com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider) HashMap(java.util.HashMap) CqlToElmTranslator(com.ibm.cohort.cql.translation.CqlToElmTranslator) ClasspathCqlLibraryProvider(com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider) CqlLibraryProvider(com.ibm.cohort.cql.library.CqlLibraryProvider) TranslatingCqlLibraryProvider(com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider) EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests) ClasspathCqlLibraryProvider(com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 4 with EqualsStringMatcher

use of com.ibm.cohort.cql.util.EqualsStringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class ColumnRuleCreatorTest method testGetFiltersForContextWithMultiJoinColumns.

@Test
public void testGetFiltersForContextWithMultiJoinColumns() throws Exception {
    CqlToElmTranslator cqlTranslator = new CqlToElmTranslator();
    cqlTranslator.registerModelInfo(new File("src/test/resources/multiple-joins/modelinfo/omop-modelinfo-5.2.2.xml"));
    ObjectMapper mapper = new ObjectMapper();
    CqlEvaluationRequests requests = mapper.readValue(new File("src/test/resources/multiple-joins/metadata/cql-jobs.json"), CqlEvaluationRequests.class);
    TranslatingCqlLibraryProvider cqlLibraryProvider = new TranslatingCqlLibraryProvider(new ClasspathCqlLibraryProvider("multiple-joins.cql"), cqlTranslator);
    ColumnRuleCreator columnRuleCreator = new ColumnRuleCreator(requests.getEvaluations(), cqlTranslator, cqlLibraryProvider);
    ContextDefinitions definitions = mapper.readValue(new File("src/test/resources/multiple-joins/metadata/context-definitions.json"), ContextDefinitions.class);
    ContextDefinition context = definitions.getContextDefinitionByName("person");
    Map<String, Set<StringMatcher>> actual = columnRuleCreator.getDataRequirementsForContext(context);
    Map<String, Set<StringMatcher>> expected = new HashMap<>();
    expected.put("person", new HashSet<>(Arrays.asList(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX), new EqualsStringMatcher("person_id"))));
    expected.put("vocabulary", new HashSet<>(Arrays.asList(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX), new EqualsStringMatcher("vocabulary_id"), new EqualsStringMatcher("vocabulary_version"), new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX))));
    expected.put("concept", new HashSet<>(Arrays.asList(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX), new EqualsStringMatcher("concept_id"), new EqualsStringMatcher("concept_code"), new EqualsStringMatcher("concept_name"), new EqualsStringMatcher("vocabulary_id"), new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX))));
    expected.put("observation", new HashSet<>(Arrays.asList(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX), new EqualsStringMatcher("observation_concept_id"), new EqualsStringMatcher("person_id"), new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX))));
    assertEquals(expected, actual);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) TranslatingCqlLibraryProvider(com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider) HashMap(java.util.HashMap) CqlToElmTranslator(com.ibm.cohort.cql.translation.CqlToElmTranslator) EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) CqlEvaluationRequests(com.ibm.cohort.cql.evaluation.CqlEvaluationRequests) ClasspathCqlLibraryProvider(com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 5 with EqualsStringMatcher

use of com.ibm.cohort.cql.util.EqualsStringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class ColumnFilterFunctionTest method testColumnFiltering.

@Test
public void testColumnFiltering() {
    String path = new File("src/test/resources/alltypes/testdata/test-A.parquet").toURI().toString();
    Dataset<Row> baseline = spark.read().format("parquet").load(path);
    assertEquals(12, baseline.schema().fields().length);
    String colName = "boolean_col";
    String regexColName = "code_col[0-9]*";
    ColumnFilterFunction datasetTransformer = new ColumnFilterFunction(new HashSet<>(Arrays.asList(new EqualsStringMatcher(colName), new RegexStringMatcher(regexColName))));
    Dataset<Row> filtered = datasetTransformer.apply(baseline);
    Set<String> expectedNames = new HashSet<>(Arrays.asList(colName, "code_col", "code_col_system", "string_col", "code_col2"));
    Set<String> actualNames = new HashSet<>(Arrays.asList(filtered.schema().fieldNames()));
    assertEquals(expectedNames, actualNames);
}
Also used : RegexStringMatcher(com.ibm.cohort.cql.util.RegexStringMatcher) EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) Row(org.apache.spark.sql.Row) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test) BaseSparkTest(com.ibm.cohort.cql.spark.BaseSparkTest)

Aggregations

EqualsStringMatcher (com.ibm.cohort.cql.util.EqualsStringMatcher)5 HashSet (java.util.HashSet)5 CqlToElmTranslator (com.ibm.cohort.cql.translation.CqlToElmTranslator)4 File (java.io.File)4 HashMap (java.util.HashMap)4 Set (java.util.Set)4 Test (org.junit.Test)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 CqlEvaluationRequests (com.ibm.cohort.cql.evaluation.CqlEvaluationRequests)3 ClasspathCqlLibraryProvider (com.ibm.cohort.cql.library.ClasspathCqlLibraryProvider)3 CqlLibraryProvider (com.ibm.cohort.cql.library.CqlLibraryProvider)3 TranslatingCqlLibraryProvider (com.ibm.cohort.cql.translation.TranslatingCqlLibraryProvider)3 CqlEvaluationRequest (com.ibm.cohort.cql.evaluation.CqlEvaluationRequest)1 CqlLibraryDescriptor (com.ibm.cohort.cql.library.CqlLibraryDescriptor)1 BaseSparkTest (com.ibm.cohort.cql.spark.BaseSparkTest)1 DataTypeRequirementsProcessor (com.ibm.cohort.cql.spark.optimizer.DataTypeRequirementsProcessor)1 RegexStringMatcher (com.ibm.cohort.cql.util.RegexStringMatcher)1 StringMatcher (com.ibm.cohort.cql.util.StringMatcher)1 List (java.util.List)1 Map (java.util.Map)1