Search in sources :

Example 1 with ValueGroup

use of gov.cms.bfd.model.codebook.model.ValueGroup in project beneficiary-fhir-data by CMSgov.

the class SupportedCodebookTest method findDuplicateCodes.

/**
 * Parses all of the {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s using {@link
 * gov.cms.bfd.model.codebook.extractor.PdfParser}, looking for duplicate {@link Value#getCode()}s
 * within each {@link Variable}.
 *
 * @throws IOException Indicates test error.
 */
@Test
public void findDuplicateCodes() throws IOException {
    for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
        Codebook codebook = PdfParser.parseCodebookPdf(supportedCodebook);
        for (Variable variable : codebook.getVariables()) {
            if (!variable.getValueGroups().isPresent())
                continue;
            // Build a multimap of all the Values by their codes.
            Map<String, List<Value>> valuesByCode = new LinkedHashMap<>();
            for (ValueGroup valueGroup : variable.getValueGroups().get()) {
                for (Value value : valueGroup.getValues()) {
                    if (!valuesByCode.containsKey(value.getCode()))
                        valuesByCode.put(value.getCode(), new ArrayList<>());
                    valuesByCode.get(value.getCode()).add(value);
                }
            }
            // Find all of the codes that appear in more than one Value.
            List<String> duplicatedCodes = new ArrayList<>();
            for (String code : valuesByCode.keySet()) {
                List<Value> values = valuesByCode.get(code);
                if (values.size() > 1)
                    duplicatedCodes.add(code);
            }
            // Log a detailed warning for each duplicate.
            for (String duplicatedCode : duplicatedCodes) {
                List<Value> duplicatedValues = valuesByCode.get(duplicatedCode);
                LOGGER.warn("The code '{}' appears more than once in Variable '{}': {}.", duplicatedCode, variable, duplicatedValues);
            }
        }
    }
}
Also used : Codebook(gov.cms.bfd.model.codebook.model.Codebook) Variable(gov.cms.bfd.model.codebook.model.Variable) ValueGroup(gov.cms.bfd.model.codebook.model.ValueGroup) Value(gov.cms.bfd.model.codebook.model.Value) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.jupiter.api.Test)

Example 2 with ValueGroup

use of gov.cms.bfd.model.codebook.model.ValueGroup in project beneficiary-fhir-data by CMSgov.

the class PdfParser method parseValueGroups.

/**
 * @param variableSection the variable section to parse the value from
 * @return the {@link Variable#getValueGroups()} value from the specified {@link Variable} raw
 *     text section, or <code>null</code> if it was not present
 */
private static List<ValueGroup> parseValueGroups(List<String> variableSection) {
    /*
     * The parsing strategy here is basically this: 1) each Variable has EITHER a
     * valueFormat or valueGroups, 2) if the field value includes at least one
     * "XX = YY" line, it's a valueGroups, 3) otherwise (if it doesn't contain a
     * code list), it's a valueFormat.
     */
    String variableId = parseId(variableSection);
    List<String> fieldLines = extractFieldContent(variableSection, FIELD_NAME_VALUES, FIELD_NAME_VALUES_ALT1);
    if (fieldLines == null)
        throw new IllegalStateException(String.format("Invalid '%s' field in variable section: %s", FIELD_NAME_VALUES, variableSection));
    // Does this field have a an "XX = YY" coded value?
    boolean foundCodedValue = false;
    for (String line : variableSection) {
        if (PATTERN_VALUE_LINE_WITH_CODE.matcher(line).matches())
            foundCodedValue = true;
    }
    if (!foundCodedValue)
        return null;
    /*
     * Now we know we're dealing with coded values, so we need to parse those.
     */
    List<ValueGroup> valueGroups = new ArrayList<>();
    ValueGroup currentValueGroup = new ValueGroup();
    List<String> currentValueGroupDescription = new ArrayList<>();
    List<String> currentValue = new ArrayList<>();
    for (int fieldLineIndex = 0; fieldLineIndex < fieldLines.size(); fieldLineIndex++) {
        String fieldLine = fieldLines.get(fieldLineIndex);
        if (isValueGroupDescription(variableId, fieldLines, fieldLineIndex)) {
            if (!currentValue.isEmpty()) {
                // FYI: We just ended a Value that needs to be collected.
                Value completedValue = parseValue(currentValue);
                currentValueGroup.getValues().add(completedValue);
                currentValue = new ArrayList<>();
                // FYI: We also just ended a ValueGroup that needs to be collected.
                valueGroups.add(currentValueGroup);
                currentValueGroup = new ValueGroup();
            }
            // Regardless of what else is happening, always collect the line.
            currentValueGroupDescription.add(fieldLine);
        } else {
            // FYI: We're in a Value.
            boolean isLineStartOfValue = PATTERN_VALUE_LINE_WITH_CODE.matcher(fieldLine).matches();
            if (!currentValueGroupDescription.isEmpty()) {
                // FYI: We just ended a ValueGroup description that needs to be collected.
                List<String> valueGroupDescriptionParagraphs = extractParagraphs(currentValueGroupDescription);
                currentValueGroup.setDescription(valueGroupDescriptionParagraphs);
                currentValueGroupDescription = new ArrayList<>();
            }
            if (isLineStartOfValue && !currentValue.isEmpty()) {
                // FYI: We're starting a new Value and need to collect the previous one.
                Value completedValue = parseValue(currentValue);
                currentValueGroup.getValues().add(completedValue);
                currentValue = new ArrayList<>();
            }
            // Regardless of what else is happening, always collect the line.
            currentValue.add(fieldLine);
        }
    }
    if (!currentValue.isEmpty()) {
        // FYI: We're through all lines but need to collect the last Value.
        Value completedValue = parseValue(currentValue);
        currentValueGroup.getValues().add(completedValue);
        // FYI: We also just ended a ValueGroup that needs to be collected.
        valueGroups.add(currentValueGroup);
    }
    // Sanity check: make sure we don't have any leftovers.
    if (!currentValueGroupDescription.isEmpty())
        throw new BadCodeMonkeyException();
    return valueGroups;
}
Also used : BadCodeMonkeyException(gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException) ValueGroup(gov.cms.bfd.model.codebook.model.ValueGroup) ArrayList(java.util.ArrayList) Value(gov.cms.bfd.model.codebook.model.Value)

Aggregations

Value (gov.cms.bfd.model.codebook.model.Value)2 ValueGroup (gov.cms.bfd.model.codebook.model.ValueGroup)2 ArrayList (java.util.ArrayList)2 Codebook (gov.cms.bfd.model.codebook.model.Codebook)1 Variable (gov.cms.bfd.model.codebook.model.Variable)1 BadCodeMonkeyException (gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Test (org.junit.jupiter.api.Test)1