Search in sources :

Example 1 with Codebook

use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.

the class PdfParserTest method parseCodebookPdf_CARR_LINE_PRVDR_TYPE_CD.

/**
 * Tests {@link
 * gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)} against
 * {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook#FFS_CLAIMS} for the <code>
 * CARR_LINE_PRVDR_TYPE_CD</code> variable.
 *
 * @throws IOException Indicates test error.
 */
@Test
public void parseCodebookPdf_CARR_LINE_PRVDR_TYPE_CD() throws IOException {
    /*
     * Why are we spot checking this one variable's parsed output? Because it's
     * somewhat interesting: 1) it has multiple valueGroups, 2) many of its values
     * have multiple lines, 3) many of its coded values are duplicated, 4) it has a
     * COMMENT that's just "-".
     */
    Codebook codebook = PdfParser.parseCodebookPdf(SupportedCodebook.FFS_CLAIMS);
    Variable variable = codebook.getVariables().stream().filter(v -> v.getId().equals("CARR_LINE_PRVDR_TYPE_CD")).findAny().get();
    String expectedDescription1 = "Code identifying the type of provider furnishing the service for this line" + " item on the carrier claim.";
    assertEquals("Carrier Line Provider Type Code", variable.getLabel());
    assertParagraphsEquals(Arrays.asList(expectedDescription1), variable.getDescription());
    assertEquals("PRV_TYPE", variable.getShortName().get());
    assertEquals("CARR_LINE_PRVDR_TYPE_CD", variable.getLongName());
    assertEquals(VariableType.CHAR, variable.getType().get());
    assertEquals(new Integer(1), variable.getLength());
    assertEquals("NCH", variable.getSource().get());
    assertFalse(variable.getValueFormat().isPresent());
    assertEquals(2, variable.getValueGroups().get().size());
    assertEquals(8, variable.getValueGroups().get().get(0).getValues().size());
    assertParagraphsEquals(Arrays.asList("For Physician/Supplier Claims:"), variable.getValueGroups().get().get(0).getDescription());
    assertEquals(9, variable.getValueGroups().get().get(1).getValues().size());
    assertParagraphsEquals(Arrays.asList("NOTE: PRIOR TO VERSION H, DME claims also used this code; the" + " following were valid code VALUES:"), variable.getValueGroups().get().get(1).getDescription());
    // Spot-check some of the values:
    Value value_0_3 = variable.getValueGroups().get().get(0).getValues().get(3);
    assertEquals("3", value_0_3.getCode());
    assertEquals("Institutional provider", value_0_3.getDescription());
    Value value_1_8 = variable.getValueGroups().get().get(1).getValues().get(8);
    assertEquals("8", value_1_8.getCode());
    assertEquals("Other entities for whom EI numbers are used in coding the ID field or proprietorship" + " for whom EI numbers are used in coding the ID field.", value_1_8.getDescription());
    assertFalse(variable.getComment().isPresent());
}
Also used : Codebook(gov.cms.bfd.model.codebook.model.Codebook) Variable(gov.cms.bfd.model.codebook.model.Variable) Value(gov.cms.bfd.model.codebook.model.Value) Test(org.junit.jupiter.api.Test)

Example 2 with Codebook

use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.

the class PdfParserTest method parseCodebookPdf.

/**
 * Tests {@link
 * gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)} against all
 * {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s.
 *
 * @throws IOException Indicates test error.
 */
@Test
public void parseCodebookPdf() throws IOException {
    for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
        LOGGER.info("Looking for sections in codebook: {}", supportedCodebook.name());
        Codebook codebook = PdfParser.parseCodebookPdf(supportedCodebook);
        /*
       * Since this test runs against all of the PDFs, it's mostly just a
       * "make sure things don't blow up" test case. Which is fine! But we can still
       * verify some basic facts about the results.
       */
        assertNotNull(codebook);
        // Note: The 2017-05 version of the PDE codebook has 56 variables.
        assertTrue(codebook.getVariables().size() > 50, "Not as many variables as expected: " + codebook.getVariables().size());
        for (Variable variable : codebook.getVariables()) {
            assertVariableIsValid(variable);
        }
    }
}
Also used : Codebook(gov.cms.bfd.model.codebook.model.Codebook) Variable(gov.cms.bfd.model.codebook.model.Variable) Test(org.junit.jupiter.api.Test)

Example 3 with Codebook

use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.

the class PdfParserTest method parseCodebookPdf_DSH_OP_CLM_VAL_AMT.

/**
 * Tests {@link
 * gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)}} against
 * {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook#FFS_CLAIMS} for the <code>
 * DSH_OP_CLM_VAL_AMT</code> variable.
 *
 * @throws IOException Indicates test error.
 */
@Test
public void parseCodebookPdf_DSH_OP_CLM_VAL_AMT() throws IOException {
    /*
     * Why are we spot checking this one variable's parsed output? Because it's
     * somewhat interesting: 1) it has multiple paragraphs in its DESCRIPTION and
     * COMMENT fields, 2) it's an example of a simple valueFormat, 3) its COMMENT
     * has a line that looks like a new field, but isn't, and 4) its COMMENT also
     * has a long URL that line breaks with a hyphen.
     */
    Codebook codebook = PdfParser.parseCodebookPdf(SupportedCodebook.FFS_CLAIMS);
    Variable variable = codebook.getVariables().stream().filter(v -> v.getId().equals("DSH_OP_CLM_VAL_AMT")).findAny().get();
    String expectedDescription1 = "This is one component of the total amount that is payable on prospective" + " payment system (PPS) claims, and reflects the DSH (disproportionate share hospital) payments" + " for operating expenses (such as labor) for the claim.";
    String expectedDescription2 = "There are two types of DSH amounts that may be payable for many PPS claims;" + " the other type of DSH payment is for the DSH capital amount (variable called" + " CLM_PPS_CPTL_DSPRPRTNT_SHR_AMT).";
    String expectedDescription3 = "Both operating and capital DSH payments are components of the PPS, as well" + " as numerous other factors.";
    String expectedComment1 = "Medicare payments are described in detail in a series of Medicare Payment" + " Advisory Commission (MedPAC) documents called “Payment Basics” (see:" + " http://www.medpac.gov/payment_basics.cfm).";
    String expectedComment2 = "Also in the Medicare Learning Network (MLN) “Payment System Fact Sheet Series”" + " (see: " + "http://www.cms.gov/Outreach-and-Education/Medicare-Learning-Network-MLN/MLNProducts/MLN-Publications.html" + ").";
    String expectedComment3 = "DERIVATION RULES: If there is a value code '18' (i.e., in the Value Code File, if the" + " VAL_CD='18') then this dollar amount (VAL_AMT) is used to populate this field.\"";
    assertEquals("Operating Disproportionate Share (DSH) Amount", variable.getLabel());
    assertParagraphsEquals(Arrays.asList(expectedDescription1, expectedDescription2, expectedDescription3), variable.getDescription());
    assertEquals("DSH_OP", variable.getShortName().get());
    assertEquals("DSH_OP_CLM_VAL_AMT", variable.getLongName());
    assertEquals(VariableType.NUM, variable.getType().get());
    assertEquals(new Integer(12), variable.getLength());
    assertEquals("NCH", variable.getSource().get());
    assertEquals("XXX.XX", variable.getValueFormat().get());
    assertFalse(variable.getValueGroups().isPresent());
    assertParagraphsEquals(Arrays.asList(expectedComment1, expectedComment2, expectedComment3), variable.getComment());
}
Also used : Codebook(gov.cms.bfd.model.codebook.model.Codebook) Variable(gov.cms.bfd.model.codebook.model.Variable) Test(org.junit.jupiter.api.Test)

Example 4 with Codebook

use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.

the class PdfParserTest method findVariableSections.

/**
 * Tests {@link gov.cms.bfd.model.codebook.extractor.PdfParser#findVariableSections(List)} against
 * all {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s.
 *
 * @throws IOException Indicates test error.
 */
@Test
public void findVariableSections() throws IOException {
    for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
        try (InputStream codebookPdfStream = supportedCodebook.getCodebookPdfInputStream()) {
            LOGGER.info("Looking for sections in codebook: {}", supportedCodebook.name());
            /*
         * Note: We leave the printXXX(...) calls here disabled unless/until they're
         * needed to debug a specific problem, as they add a ton of log noise.
         */
            List<String> codebookTextLines = PdfParser.extractTextLinesFromPdf(codebookPdfStream);
            // printTextLinesToConsole(codebookTextLines);
            List<List<String>> variableSections = PdfParser.findVariableSections(codebookTextLines);
            for (List<String> variableSection : variableSections) {
                assertNotNull(variableSection);
                assertTrue(variableSection.size() >= 10);
            }
            /*
         * How else can you verify that the section splitting code worked correctly?
         * Pick a one-line field that should have a unique value in each section, find
         * all instances of that field in the un-grouped lines, then make sure that each
         * one of those unique field lines can be found in a section.
         */
            Predicate<? super String> searchFieldFilter = l -> l.startsWith("SHORT_NAME:");
            List<String> searchFieldLines = codebookTextLines.stream().filter(searchFieldFilter).collect(Collectors.toList());
            // If this fails, we need to pick a different search field.
            assertEquals(searchFieldLines.size(), new HashSet<>(searchFieldLines).size(), "Not all instances of that field are unique.");
            for (String searchFieldLine : searchFieldLines) {
                boolean foundSection = false;
                for (List<String> variableSection : variableSections) {
                    for (String line : variableSection) if (searchFieldLine.equals(line))
                        foundSection = true;
                }
                assertTrue(foundSection, String.format("Can't find search field line: '%s'", searchFieldLine));
            }
        }
    }
}
Also used : Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) Files(java.nio.file.Files) Predicate(java.util.function.Predicate) Variable(gov.cms.bfd.model.codebook.model.Variable) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) FileWriter(java.io.FileWriter) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Codebook(gov.cms.bfd.model.codebook.model.Codebook) UncheckedIOException(java.io.UncheckedIOException) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test) List(java.util.List) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) VariableType(gov.cms.bfd.model.codebook.model.VariableType) Optional(java.util.Optional) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Value(gov.cms.bfd.model.codebook.model.Value) Path(java.nio.file.Path) InputStream(java.io.InputStream) InputStream(java.io.InputStream) List(java.util.List) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 5 with Codebook

use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.

the class PdfParser method parseCodebookPdf.

/**
 * @param codebookSource the codebook to be converted
 * @return a {@link Codebook} instance representing the data from the parsed codebook PDF
 */
public static Codebook parseCodebookPdf(SupportedCodebook codebookSource) {
    try (InputStream codebookPdfStream = codebookSource.getCodebookPdfInputStream()) {
        List<String> codebookTextLines = extractTextLinesFromPdf(codebookPdfStream);
        Codebook codebook = new Codebook(codebookSource);
        /*
       * It's a bit inefficient, but we first go through all the lines and group them
       * into the separate variable declarations represented by them. Just makes it
       * easier to reason about the logic here.
       */
        List<List<String>> variableSections = findVariableSections(codebookTextLines);
        // Parse each section into the Variable model it represents.
        for (List<String> variableSection : variableSections) {
            Variable variable = parseVariable(codebook, variableSection);
            codebook.getVariables().add(variable);
        }
        return codebook;
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : Codebook(gov.cms.bfd.model.codebook.model.Codebook) Variable(gov.cms.bfd.model.codebook.model.Variable) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) List(java.util.List) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException)

Aggregations

Codebook (gov.cms.bfd.model.codebook.model.Codebook)11 Variable (gov.cms.bfd.model.codebook.model.Variable)10 Test (org.junit.jupiter.api.Test)7 List (java.util.List)5 UncheckedIOException (java.io.UncheckedIOException)4 ArrayList (java.util.ArrayList)4 Value (gov.cms.bfd.model.codebook.model.Value)3 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 LinkedHashMap (java.util.LinkedHashMap)3 SupportedCodebook (gov.cms.bfd.model.codebook.extractor.SupportedCodebook)2 Path (java.nio.file.Path)2 ValueGroup (gov.cms.bfd.model.codebook.model.ValueGroup)1 VariableType (gov.cms.bfd.model.codebook.model.VariableType)1 UncheckedJaxbException (gov.cms.bfd.sharedutils.exceptions.UncheckedJaxbException)1 FileWriter (java.io.FileWriter)1 InputStreamReader (java.io.InputStreamReader)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 URL (java.net.URL)1 URLClassLoader (java.net.URLClassLoader)1