Examples with StringMatcher - com.ibm.cohort.cql.util.StringMatcher

Example 1 with StringMatcher

use of com.ibm.cohort.cql.util.StringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class AnyColumnFunctions method AnyColumnRegex.

public static Object AnyColumnRegex(Object object, String regex) {
    DataRow dataRow = (DataRow) object;
    StringMatcher matcher = new RegexStringMatcher(regex);
    return dataRow.getFieldNames().stream().filter(matcher).map(dataRow::getValue).collect(Collectors.toList());
}

Also used : RegexStringMatcher(com.ibm.cohort.cql.util.RegexStringMatcher) StringMatcher(com.ibm.cohort.cql.util.StringMatcher) RegexStringMatcher(com.ibm.cohort.cql.util.RegexStringMatcher) PrefixStringMatcher(com.ibm.cohort.cql.util.PrefixStringMatcher) DataRow(com.ibm.cohort.datarow.model.DataRow)

Example 2 with StringMatcher

use of com.ibm.cohort.cql.util.StringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class AnyColumnFunctions method AnyColumn.

public static Object AnyColumn(Object object, String fieldPrefix) {
    DataRow dataRow = (DataRow) object;
    StringMatcher matcher = new PrefixStringMatcher(fieldPrefix);
    return dataRow.getFieldNames().stream().filter(matcher).map(dataRow::getValue).collect(Collectors.toList());
}

Also used : StringMatcher(com.ibm.cohort.cql.util.StringMatcher) RegexStringMatcher(com.ibm.cohort.cql.util.RegexStringMatcher) PrefixStringMatcher(com.ibm.cohort.cql.util.PrefixStringMatcher) PrefixStringMatcher(com.ibm.cohort.cql.util.PrefixStringMatcher) DataRow(com.ibm.cohort.datarow.model.DataRow)

Example 3 with StringMatcher

use of com.ibm.cohort.cql.util.StringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class ColumnRuleCreator method getDataRequirementsForContext.

/**
 * Retrieve the merged set of data type and column filters for all CQL jobs that will
 * be evaluated for a given aggregation context.
 *
 * @param context ContextDefinition whose CQL jobs will be interrogated for data requirements
 * @return Map of data type to the fields in that datatype that are used by the CQL jobs
 */
public Map<String, Set<StringMatcher>> getDataRequirementsForContext(ContextDefinition context) {
    Map<CqlLibraryDescriptor, Set<String>> expressionsByLibrary = new HashMap<>();
    for (CqlEvaluationRequest request : requests) {
        Set<String> expressions = expressionsByLibrary.computeIfAbsent(request.getDescriptor(), desc -> new HashSet<>());
        request.getExpressions().stream().forEach(exp -> expressions.add(exp.getName()));
    }
    DataTypeRequirementsProcessor requirementsProcessor = new DataTypeRequirementsProcessor(cqlTranslator);
    Map<String, Set<StringMatcher>> pathsByDataType = new HashMap<>();
    for (Map.Entry<CqlLibraryDescriptor, Set<String>> entry : expressionsByLibrary.entrySet()) {
        LOG.debug("Extracting data requirements for {}", entry.getKey());
        DataTypeRequirementsProcessor.DataTypeRequirements requirements = requirementsProcessor.getDataRequirements(libraryProvider, entry.getKey(), entry.getValue());
        Map<String, Set<StringMatcher>> newPaths = requirements.allAsStringMatcher();
        newPaths.forEach((key, value) -> {
            pathsByDataType.merge(key, value, (prev, current) -> {
                prev.addAll(current);
                return prev;
            });
        });
    }
    Set<StringMatcher> contextFields = pathsByDataType.computeIfAbsent(context.getPrimaryDataType(), dt -> new HashSet<>());
    contextFields.add(new EqualsStringMatcher(context.getPrimaryKeyColumn()));
    if (context.getRelationships() != null) {
        for (Join join : context.getRelationships()) {
            Set<StringMatcher> joinFields = pathsByDataType.get(join.getRelatedDataType());
            if (joinFields != null) {
                joinFields.add(new EqualsStringMatcher(join.getRelatedKeyColumn()));
                joinFields.add(new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX));
                // if the join key is not the primary key of the primary data table, then we need to add in the alternate key
                if (join.getPrimaryDataTypeColumn() != null) {
                    contextFields.add(new EqualsStringMatcher(join.getPrimaryDataTypeColumn()));
                }
                if (join instanceof ManyToMany) {
                    ManyToMany manyToMany = (ManyToMany) join;
                    Set<StringMatcher> associationFields = pathsByDataType.computeIfAbsent(manyToMany.getAssociationDataType(), dt -> new HashSet<>());
                    associationFields.add(new EqualsStringMatcher(manyToMany.getAssociationOneKeyColumn()));
                    associationFields.add(new EqualsStringMatcher(manyToMany.getAssociationManyKeyColumn()));
                }
                if (join instanceof MultiManyToMany) {
                    ManyToMany with = ((MultiManyToMany) join).getWith();
                    while (with != null) {
                        Set<StringMatcher> relatedFields = pathsByDataType.computeIfAbsent(with.getRelatedDataType(), dt -> new HashSet<>());
                        relatedFields.add(new EqualsStringMatcher(with.getRelatedKeyColumn()));
                        relatedFields.add(new EqualsStringMatcher(ContextRetriever.JOIN_CONTEXT_VALUE_IDX));
                        with = (with instanceof MultiManyToMany) ? ((MultiManyToMany) with).getWith() : null;
                    }
                }
            }
        }
    }
    pathsByDataType.values().forEach((matcherSet -> {
        matcherSet.add(new EqualsStringMatcher(ContextRetriever.SOURCE_FACT_IDX));
    }));
    return pathsByDataType;
}

Also used : EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) Logger(org.slf4j.Logger) CqlToElmTranslator(com.ibm.cohort.cql.translation.CqlToElmTranslator) DataTypeRequirementsProcessor(com.ibm.cohort.cql.spark.optimizer.DataTypeRequirementsProcessor) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) CqlLibraryProvider(com.ibm.cohort.cql.library.CqlLibraryProvider) HashMap(java.util.HashMap) CqlLibraryDescriptor(com.ibm.cohort.cql.library.CqlLibraryDescriptor) HashSet(java.util.HashSet) StringMatcher(com.ibm.cohort.cql.util.StringMatcher) List(java.util.List) Map(java.util.Map) CqlEvaluationRequest(com.ibm.cohort.cql.evaluation.CqlEvaluationRequest) Set(java.util.Set) HashSet(java.util.HashSet) DataTypeRequirementsProcessor(com.ibm.cohort.cql.spark.optimizer.DataTypeRequirementsProcessor) HashMap(java.util.HashMap) CqlEvaluationRequest(com.ibm.cohort.cql.evaluation.CqlEvaluationRequest) EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) StringMatcher(com.ibm.cohort.cql.util.StringMatcher) EqualsStringMatcher(com.ibm.cohort.cql.util.EqualsStringMatcher) CqlLibraryDescriptor(com.ibm.cohort.cql.library.CqlLibraryDescriptor) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with StringMatcher

use of com.ibm.cohort.cql.util.StringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class ColumnFilterFunction method apply.

/**
 * Filter the columns in a dataset based on a set of matching rules provided at class initialization.
 *
 * @param input Dataset to be filtered
 * @return Dataset filtered to the columns matching one or more string matchers used to initialize
 *         this class. If a code column is included in the output, any columns associated with the
 *         code column through metadata fields will also be included.
 */
@Override
public Dataset<Row> apply(Dataset<Row> input) {
    Dataset<Row> result = null;
    if (CollectionUtils.isNotEmpty(columnNameMatchers)) {
        Dataset<Row> sourceDataset = input;
        List<Column> cols = new ArrayList<>();
        for (StringMatcher colNameMatcher : columnNameMatchers) {
            try {
                Stream.of(sourceDataset.schema().fieldNames()).filter(fn -> colNameMatcher.test(fn)).map(fn -> sourceDataset.col(fn)).forEach(col -> {
                    cols.add(col);
                    Metadata metadata = MetadataUtils.getColumnMetadata(sourceDataset.schema(), col.toString());
                    if (metadata != null) {
                        if (MetadataUtils.isCodeCol(metadata)) {
                            String systemCol = MetadataUtils.getSystemCol(metadata);
                            if (systemCol != null) {
                                cols.add(sourceDataset.col(systemCol));
                            }
                            String displayCol = MetadataUtils.getDisplayCol(metadata);
                            if (displayCol != null) {
                                cols.add(sourceDataset.col(displayCol));
                            }
                        }
                    }
                });
            } catch (Throwable th) {
                LOG.error("Failed to resolve column %s of data type %s", th);
                throw th;
            }
        }
        result = sourceDataset.select(cols.toArray(new Column[0]));
    }
    return result;
}

Also used : Metadata(org.apache.spark.sql.types.Metadata) Logger(org.slf4j.Logger) Dataset(org.apache.spark.sql.Dataset) Column(org.apache.spark.sql.Column) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) UnaryOperator(java.util.function.UnaryOperator) Row(org.apache.spark.sql.Row) ArrayList(java.util.ArrayList) StringMatcher(com.ibm.cohort.cql.util.StringMatcher) List(java.util.List) Stream(java.util.stream.Stream) CollectionUtils(org.apache.commons.collections.CollectionUtils) Column(org.apache.spark.sql.Column) ArrayList(java.util.ArrayList) Metadata(org.apache.spark.sql.types.Metadata) StringMatcher(com.ibm.cohort.cql.util.StringMatcher) Row(org.apache.spark.sql.Row)

Example 5 with StringMatcher

use of com.ibm.cohort.cql.util.StringMatcher in project quality-measure-and-cohort-service by Alvearie.

the class AnyColumnVisitor method visitFunctionRef.

@Override
public Object visitFunctionRef(FunctionRef elm, AnyColumnContext context) {
    if (AnyColumnFunctions.FUNCTION_NAMES.contains(elm.getName())) {
        if (elm.getOperand().size() == 2) {
            QName dataType = ((As) elm.getOperand().get(0)).getOperand().getResultTypeName();
            // TODO - validate that the first operand is a model object. We really should be doing that at the
            // method declaration level instead of Choice<Any>, but that will require the model
            // to have a base class that everything extends from.
            String columnMatchLogic = null;
            if (elm.getOperand().get(1) instanceof Literal) {
                columnMatchLogic = ((Literal) elm.getOperand().get(1)).getValue();
            } else {
                throw new IllegalArgumentException(String.format("Second argument to %s function at %s must be a literal", elm.getName(), elm.getLocator()));
            }
            StringMatcher matcher = null;
            if (elm.getName().equals(AnyColumnFunctions.FUNC_ANY_COLUMN)) {
                matcher = new PrefixStringMatcher(columnMatchLogic);
            } else if (elm.getName().equals(AnyColumnFunctions.FUNC_ANY_COLUMN_REGEX)) {
                matcher = new RegexStringMatcher(columnMatchLogic);
            } else {
                throw new IllegalArgumentException(String.format("Found declared, but unsupported AnyColumn function %s at %s", elm.getName(), elm.getLocator()));
            }
            context.reportAnyColumn(dataType, matcher);
        } else {
            throw new IllegalArgumentException(String.format("%s function at %s should have exactly two arguments", elm.getName(), elm.getLocator()));
        }
    }
    return super.visitFunctionRef(elm, context);
}

Also used : RegexStringMatcher(com.ibm.cohort.cql.util.RegexStringMatcher) QName(javax.xml.namespace.QName) Literal(org.hl7.elm.r1.Literal) StringMatcher(com.ibm.cohort.cql.util.StringMatcher) RegexStringMatcher(com.ibm.cohort.cql.util.RegexStringMatcher) PrefixStringMatcher(com.ibm.cohort.cql.util.PrefixStringMatcher) PrefixStringMatcher(com.ibm.cohort.cql.util.PrefixStringMatcher)

Aggregations

StringMatcher (com.ibm.cohort.cql.util.StringMatcher)5 PrefixStringMatcher (com.ibm.cohort.cql.util.PrefixStringMatcher)3 RegexStringMatcher (com.ibm.cohort.cql.util.RegexStringMatcher)3 DataRow (com.ibm.cohort.datarow.model.DataRow)2 List (java.util.List)2 Set (java.util.Set)2 Logger (org.slf4j.Logger)2 LoggerFactory (org.slf4j.LoggerFactory)2 CqlEvaluationRequest (com.ibm.cohort.cql.evaluation.CqlEvaluationRequest)1 CqlLibraryDescriptor (com.ibm.cohort.cql.library.CqlLibraryDescriptor)1 CqlLibraryProvider (com.ibm.cohort.cql.library.CqlLibraryProvider)1 DataTypeRequirementsProcessor (com.ibm.cohort.cql.spark.optimizer.DataTypeRequirementsProcessor)1 CqlToElmTranslator (com.ibm.cohort.cql.translation.CqlToElmTranslator)1 EqualsStringMatcher (com.ibm.cohort.cql.util.EqualsStringMatcher)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 UnaryOperator (java.util.function.UnaryOperator)1 Stream (java.util.stream.Stream)1