Search in sources :

Example 1 with Value

use of gov.cms.bfd.model.codebook.model.Value in project beneficiary-fhir-data by CMSgov.

the class PdfParserTest method parseCodebookPdf_CARR_LINE_PRVDR_TYPE_CD.

/**
 * Tests {@link
 * gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)} against
 * {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook#FFS_CLAIMS} for the <code>
 * CARR_LINE_PRVDR_TYPE_CD</code> variable.
 *
 * @throws IOException Indicates test error.
 */
@Test
public void parseCodebookPdf_CARR_LINE_PRVDR_TYPE_CD() throws IOException {
    /*
     * Why are we spot checking this one variable's parsed output? Because it's
     * somewhat interesting: 1) it has multiple valueGroups, 2) many of its values
     * have multiple lines, 3) many of its coded values are duplicated, 4) it has a
     * COMMENT that's just "-".
     */
    Codebook codebook = PdfParser.parseCodebookPdf(SupportedCodebook.FFS_CLAIMS);
    Variable variable = codebook.getVariables().stream().filter(v -> v.getId().equals("CARR_LINE_PRVDR_TYPE_CD")).findAny().get();
    String expectedDescription1 = "Code identifying the type of provider furnishing the service for this line" + " item on the carrier claim.";
    assertEquals("Carrier Line Provider Type Code", variable.getLabel());
    assertParagraphsEquals(Arrays.asList(expectedDescription1), variable.getDescription());
    assertEquals("PRV_TYPE", variable.getShortName().get());
    assertEquals("CARR_LINE_PRVDR_TYPE_CD", variable.getLongName());
    assertEquals(VariableType.CHAR, variable.getType().get());
    assertEquals(new Integer(1), variable.getLength());
    assertEquals("NCH", variable.getSource().get());
    assertFalse(variable.getValueFormat().isPresent());
    assertEquals(2, variable.getValueGroups().get().size());
    assertEquals(8, variable.getValueGroups().get().get(0).getValues().size());
    assertParagraphsEquals(Arrays.asList("For Physician/Supplier Claims:"), variable.getValueGroups().get().get(0).getDescription());
    assertEquals(9, variable.getValueGroups().get().get(1).getValues().size());
    assertParagraphsEquals(Arrays.asList("NOTE: PRIOR TO VERSION H, DME claims also used this code; the" + " following were valid code VALUES:"), variable.getValueGroups().get().get(1).getDescription());
    // Spot-check some of the values:
    Value value_0_3 = variable.getValueGroups().get().get(0).getValues().get(3);
    assertEquals("3", value_0_3.getCode());
    assertEquals("Institutional provider", value_0_3.getDescription());
    Value value_1_8 = variable.getValueGroups().get().get(1).getValues().get(8);
    assertEquals("8", value_1_8.getCode());
    assertEquals("Other entities for whom EI numbers are used in coding the ID field or proprietorship" + " for whom EI numbers are used in coding the ID field.", value_1_8.getDescription());
    assertFalse(variable.getComment().isPresent());
}
Also used : Codebook(gov.cms.bfd.model.codebook.model.Codebook) Variable(gov.cms.bfd.model.codebook.model.Variable) Value(gov.cms.bfd.model.codebook.model.Value) Test(org.junit.jupiter.api.Test)

Example 2 with Value

use of gov.cms.bfd.model.codebook.model.Value in project beneficiary-fhir-data by CMSgov.

the class PdfParserTest method findVariableSections.

/**
 * Tests {@link gov.cms.bfd.model.codebook.extractor.PdfParser#findVariableSections(List)} against
 * all {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s.
 *
 * @throws IOException Indicates test error.
 */
@Test
public void findVariableSections() throws IOException {
    for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
        try (InputStream codebookPdfStream = supportedCodebook.getCodebookPdfInputStream()) {
            LOGGER.info("Looking for sections in codebook: {}", supportedCodebook.name());
            /*
         * Note: We leave the printXXX(...) calls here disabled unless/until they're
         * needed to debug a specific problem, as they add a ton of log noise.
         */
            List<String> codebookTextLines = PdfParser.extractTextLinesFromPdf(codebookPdfStream);
            // printTextLinesToConsole(codebookTextLines);
            List<List<String>> variableSections = PdfParser.findVariableSections(codebookTextLines);
            for (List<String> variableSection : variableSections) {
                assertNotNull(variableSection);
                assertTrue(variableSection.size() >= 10);
            }
            /*
         * How else can you verify that the section splitting code worked correctly?
         * Pick a one-line field that should have a unique value in each section, find
         * all instances of that field in the un-grouped lines, then make sure that each
         * one of those unique field lines can be found in a section.
         */
            Predicate<? super String> searchFieldFilter = l -> l.startsWith("SHORT_NAME:");
            List<String> searchFieldLines = codebookTextLines.stream().filter(searchFieldFilter).collect(Collectors.toList());
            // If this fails, we need to pick a different search field.
            assertEquals(searchFieldLines.size(), new HashSet<>(searchFieldLines).size(), "Not all instances of that field are unique.");
            for (String searchFieldLine : searchFieldLines) {
                boolean foundSection = false;
                for (List<String> variableSection : variableSections) {
                    for (String line : variableSection) if (searchFieldLine.equals(line))
                        foundSection = true;
                }
                assertTrue(foundSection, String.format("Can't find search field line: '%s'", searchFieldLine));
            }
        }
    }
}
Also used : Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) Files(java.nio.file.Files) Predicate(java.util.function.Predicate) Variable(gov.cms.bfd.model.codebook.model.Variable) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) FileWriter(java.io.FileWriter) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Codebook(gov.cms.bfd.model.codebook.model.Codebook) UncheckedIOException(java.io.UncheckedIOException) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test) List(java.util.List) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) VariableType(gov.cms.bfd.model.codebook.model.VariableType) Optional(java.util.Optional) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Value(gov.cms.bfd.model.codebook.model.Value) Path(java.nio.file.Path) InputStream(java.io.InputStream) InputStream(java.io.InputStream) List(java.util.List) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 3 with Value

use of gov.cms.bfd.model.codebook.model.Value in project beneficiary-fhir-data by CMSgov.

the class SupportedCodebookTest method findDuplicateCodes.

/**
 * Parses all of the {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s using {@link
 * gov.cms.bfd.model.codebook.extractor.PdfParser}, looking for duplicate {@link Value#getCode()}s
 * within each {@link Variable}.
 *
 * @throws IOException Indicates test error.
 */
@Test
public void findDuplicateCodes() throws IOException {
    for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
        Codebook codebook = PdfParser.parseCodebookPdf(supportedCodebook);
        for (Variable variable : codebook.getVariables()) {
            if (!variable.getValueGroups().isPresent())
                continue;
            // Build a multimap of all the Values by their codes.
            Map<String, List<Value>> valuesByCode = new LinkedHashMap<>();
            for (ValueGroup valueGroup : variable.getValueGroups().get()) {
                for (Value value : valueGroup.getValues()) {
                    if (!valuesByCode.containsKey(value.getCode()))
                        valuesByCode.put(value.getCode(), new ArrayList<>());
                    valuesByCode.get(value.getCode()).add(value);
                }
            }
            // Find all of the codes that appear in more than one Value.
            List<String> duplicatedCodes = new ArrayList<>();
            for (String code : valuesByCode.keySet()) {
                List<Value> values = valuesByCode.get(code);
                if (values.size() > 1)
                    duplicatedCodes.add(code);
            }
            // Log a detailed warning for each duplicate.
            for (String duplicatedCode : duplicatedCodes) {
                List<Value> duplicatedValues = valuesByCode.get(duplicatedCode);
                LOGGER.warn("The code '{}' appears more than once in Variable '{}': {}.", duplicatedCode, variable, duplicatedValues);
            }
        }
    }
}
Also used : Codebook(gov.cms.bfd.model.codebook.model.Codebook) Variable(gov.cms.bfd.model.codebook.model.Variable) ValueGroup(gov.cms.bfd.model.codebook.model.ValueGroup) Value(gov.cms.bfd.model.codebook.model.Value) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.jupiter.api.Test)

Example 4 with Value

use of gov.cms.bfd.model.codebook.model.Value in project beneficiary-fhir-data by CMSgov.

the class PdfParser method parseValueGroups.

/**
 * @param variableSection the variable section to parse the value from
 * @return the {@link Variable#getValueGroups()} value from the specified {@link Variable} raw
 *     text section, or <code>null</code> if it was not present
 */
private static List<ValueGroup> parseValueGroups(List<String> variableSection) {
    /*
     * The parsing strategy here is basically this: 1) each Variable has EITHER a
     * valueFormat or valueGroups, 2) if the field value includes at least one
     * "XX = YY" line, it's a valueGroups, 3) otherwise (if it doesn't contain a
     * code list), it's a valueFormat.
     */
    String variableId = parseId(variableSection);
    List<String> fieldLines = extractFieldContent(variableSection, FIELD_NAME_VALUES, FIELD_NAME_VALUES_ALT1);
    if (fieldLines == null)
        throw new IllegalStateException(String.format("Invalid '%s' field in variable section: %s", FIELD_NAME_VALUES, variableSection));
    // Does this field have a an "XX = YY" coded value?
    boolean foundCodedValue = false;
    for (String line : variableSection) {
        if (PATTERN_VALUE_LINE_WITH_CODE.matcher(line).matches())
            foundCodedValue = true;
    }
    if (!foundCodedValue)
        return null;
    /*
     * Now we know we're dealing with coded values, so we need to parse those.
     */
    List<ValueGroup> valueGroups = new ArrayList<>();
    ValueGroup currentValueGroup = new ValueGroup();
    List<String> currentValueGroupDescription = new ArrayList<>();
    List<String> currentValue = new ArrayList<>();
    for (int fieldLineIndex = 0; fieldLineIndex < fieldLines.size(); fieldLineIndex++) {
        String fieldLine = fieldLines.get(fieldLineIndex);
        if (isValueGroupDescription(variableId, fieldLines, fieldLineIndex)) {
            if (!currentValue.isEmpty()) {
                // FYI: We just ended a Value that needs to be collected.
                Value completedValue = parseValue(currentValue);
                currentValueGroup.getValues().add(completedValue);
                currentValue = new ArrayList<>();
                // FYI: We also just ended a ValueGroup that needs to be collected.
                valueGroups.add(currentValueGroup);
                currentValueGroup = new ValueGroup();
            }
            // Regardless of what else is happening, always collect the line.
            currentValueGroupDescription.add(fieldLine);
        } else {
            // FYI: We're in a Value.
            boolean isLineStartOfValue = PATTERN_VALUE_LINE_WITH_CODE.matcher(fieldLine).matches();
            if (!currentValueGroupDescription.isEmpty()) {
                // FYI: We just ended a ValueGroup description that needs to be collected.
                List<String> valueGroupDescriptionParagraphs = extractParagraphs(currentValueGroupDescription);
                currentValueGroup.setDescription(valueGroupDescriptionParagraphs);
                currentValueGroupDescription = new ArrayList<>();
            }
            if (isLineStartOfValue && !currentValue.isEmpty()) {
                // FYI: We're starting a new Value and need to collect the previous one.
                Value completedValue = parseValue(currentValue);
                currentValueGroup.getValues().add(completedValue);
                currentValue = new ArrayList<>();
            }
            // Regardless of what else is happening, always collect the line.
            currentValue.add(fieldLine);
        }
    }
    if (!currentValue.isEmpty()) {
        // FYI: We're through all lines but need to collect the last Value.
        Value completedValue = parseValue(currentValue);
        currentValueGroup.getValues().add(completedValue);
        // FYI: We also just ended a ValueGroup that needs to be collected.
        valueGroups.add(currentValueGroup);
    }
    // Sanity check: make sure we don't have any leftovers.
    if (!currentValueGroupDescription.isEmpty())
        throw new BadCodeMonkeyException();
    return valueGroups;
}
Also used : BadCodeMonkeyException(gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException) ValueGroup(gov.cms.bfd.model.codebook.model.ValueGroup) ArrayList(java.util.ArrayList) Value(gov.cms.bfd.model.codebook.model.Value)

Example 5 with Value

use of gov.cms.bfd.model.codebook.model.Value in project beneficiary-fhir-data by CMSgov.

the class PdfParser method parseValue.

/**
 * @param valueLines the lines of text representing a {@link Value} to be parsed
 * @return the {@link Value} parsed from those lines
 */
private static Value parseValue(List<String> valueLines) {
    // Copy the list so we can bang on it safely.
    List<String> valueLinesCopy = new ArrayList<>(valueLines);
    // Parse the first line.
    Matcher valueStartMatcher = PATTERN_VALUE_LINE_WITH_CODE.matcher(valueLinesCopy.get(0));
    valueStartMatcher.matches();
    // Grab the code from the first line.
    String code = valueStartMatcher.group(1);
    // Strip out the "XX = " prefix from the first line.
    valueLinesCopy.set(0, valueStartMatcher.group(2));
    // Convert it all to "paragraphs" to undo the line wrapping.
    valueLinesCopy = extractParagraphs(valueLinesCopy);
    // Just in case multiple paragraphs were found, glue them back together.
    StringBuilder description = new StringBuilder();
    for (Iterator<String> paragraphsIter = valueLinesCopy.iterator(); paragraphsIter.hasNext(); ) {
        description.append(paragraphsIter.next());
        if (paragraphsIter.hasNext())
            description.append(' ');
    }
    Value value = new Value();
    value.setCode(code);
    value.setDescription(description.toString());
    return value;
}
Also used : Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) Value(gov.cms.bfd.model.codebook.model.Value)

Aggregations

Value (gov.cms.bfd.model.codebook.model.Value)6 Codebook (gov.cms.bfd.model.codebook.model.Codebook)3 Variable (gov.cms.bfd.model.codebook.model.Variable)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 ValueGroup (gov.cms.bfd.model.codebook.model.ValueGroup)2 BadCodeMonkeyException (gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException)2 Test (org.junit.jupiter.api.Test)2 TemporalPrecisionEnum (ca.uhn.fhir.model.api.TemporalPrecisionEnum)1 IdDt (ca.uhn.fhir.model.primitive.IdDt)1 DataFormatException (ca.uhn.fhir.parser.DataFormatException)1 Constants (ca.uhn.fhir.rest.api.Constants)1 RequestDetails (ca.uhn.fhir.rest.api.server.RequestDetails)1 Strings (com.google.common.base.Strings)1 CcwCodebookMissingVariable (gov.cms.bfd.model.codebook.data.CcwCodebookMissingVariable)1 CcwCodebookVariable (gov.cms.bfd.model.codebook.data.CcwCodebookVariable)1 CcwCodebookInterface (gov.cms.bfd.model.codebook.model.CcwCodebookInterface)1 VariableType (gov.cms.bfd.model.codebook.model.VariableType)1 Beneficiary (gov.cms.bfd.model.rif.Beneficiary)1 CarrierClaim (gov.cms.bfd.model.rif.CarrierClaim)1