use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.
the class PdfParserTest method parseCodebookPdf_CARR_LINE_PRVDR_TYPE_CD.
/**
* Tests {@link
* gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)} against
* {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook#FFS_CLAIMS} for the <code>
* CARR_LINE_PRVDR_TYPE_CD</code> variable.
*
* @throws IOException Indicates test error.
*/
@Test
public void parseCodebookPdf_CARR_LINE_PRVDR_TYPE_CD() throws IOException {
/*
* Why are we spot checking this one variable's parsed output? Because it's
* somewhat interesting: 1) it has multiple valueGroups, 2) many of its values
* have multiple lines, 3) many of its coded values are duplicated, 4) it has a
* COMMENT that's just "-".
*/
Codebook codebook = PdfParser.parseCodebookPdf(SupportedCodebook.FFS_CLAIMS);
Variable variable = codebook.getVariables().stream().filter(v -> v.getId().equals("CARR_LINE_PRVDR_TYPE_CD")).findAny().get();
String expectedDescription1 = "Code identifying the type of provider furnishing the service for this line" + " item on the carrier claim.";
assertEquals("Carrier Line Provider Type Code", variable.getLabel());
assertParagraphsEquals(Arrays.asList(expectedDescription1), variable.getDescription());
assertEquals("PRV_TYPE", variable.getShortName().get());
assertEquals("CARR_LINE_PRVDR_TYPE_CD", variable.getLongName());
assertEquals(VariableType.CHAR, variable.getType().get());
assertEquals(new Integer(1), variable.getLength());
assertEquals("NCH", variable.getSource().get());
assertFalse(variable.getValueFormat().isPresent());
assertEquals(2, variable.getValueGroups().get().size());
assertEquals(8, variable.getValueGroups().get().get(0).getValues().size());
assertParagraphsEquals(Arrays.asList("For Physician/Supplier Claims:"), variable.getValueGroups().get().get(0).getDescription());
assertEquals(9, variable.getValueGroups().get().get(1).getValues().size());
assertParagraphsEquals(Arrays.asList("NOTE: PRIOR TO VERSION H, DME claims also used this code; the" + " following were valid code VALUES:"), variable.getValueGroups().get().get(1).getDescription());
// Spot-check some of the values:
Value value_0_3 = variable.getValueGroups().get().get(0).getValues().get(3);
assertEquals("3", value_0_3.getCode());
assertEquals("Institutional provider", value_0_3.getDescription());
Value value_1_8 = variable.getValueGroups().get().get(1).getValues().get(8);
assertEquals("8", value_1_8.getCode());
assertEquals("Other entities for whom EI numbers are used in coding the ID field or proprietorship" + " for whom EI numbers are used in coding the ID field.", value_1_8.getDescription());
assertFalse(variable.getComment().isPresent());
}
use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.
the class PdfParserTest method parseCodebookPdf.
/**
* Tests {@link
* gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)} against all
* {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s.
*
* @throws IOException Indicates test error.
*/
@Test
public void parseCodebookPdf() throws IOException {
for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
LOGGER.info("Looking for sections in codebook: {}", supportedCodebook.name());
Codebook codebook = PdfParser.parseCodebookPdf(supportedCodebook);
/*
* Since this test runs against all of the PDFs, it's mostly just a
* "make sure things don't blow up" test case. Which is fine! But we can still
* verify some basic facts about the results.
*/
assertNotNull(codebook);
// Note: The 2017-05 version of the PDE codebook has 56 variables.
assertTrue(codebook.getVariables().size() > 50, "Not as many variables as expected: " + codebook.getVariables().size());
for (Variable variable : codebook.getVariables()) {
assertVariableIsValid(variable);
}
}
}
use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.
the class PdfParserTest method parseCodebookPdf_DSH_OP_CLM_VAL_AMT.
/**
* Tests {@link
* gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)}} against
* {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook#FFS_CLAIMS} for the <code>
* DSH_OP_CLM_VAL_AMT</code> variable.
*
* @throws IOException Indicates test error.
*/
@Test
public void parseCodebookPdf_DSH_OP_CLM_VAL_AMT() throws IOException {
/*
* Why are we spot checking this one variable's parsed output? Because it's
* somewhat interesting: 1) it has multiple paragraphs in its DESCRIPTION and
* COMMENT fields, 2) it's an example of a simple valueFormat, 3) its COMMENT
* has a line that looks like a new field, but isn't, and 4) its COMMENT also
* has a long URL that line breaks with a hyphen.
*/
Codebook codebook = PdfParser.parseCodebookPdf(SupportedCodebook.FFS_CLAIMS);
Variable variable = codebook.getVariables().stream().filter(v -> v.getId().equals("DSH_OP_CLM_VAL_AMT")).findAny().get();
String expectedDescription1 = "This is one component of the total amount that is payable on prospective" + " payment system (PPS) claims, and reflects the DSH (disproportionate share hospital) payments" + " for operating expenses (such as labor) for the claim.";
String expectedDescription2 = "There are two types of DSH amounts that may be payable for many PPS claims;" + " the other type of DSH payment is for the DSH capital amount (variable called" + " CLM_PPS_CPTL_DSPRPRTNT_SHR_AMT).";
String expectedDescription3 = "Both operating and capital DSH payments are components of the PPS, as well" + " as numerous other factors.";
String expectedComment1 = "Medicare payments are described in detail in a series of Medicare Payment" + " Advisory Commission (MedPAC) documents called “Payment Basics” (see:" + " http://www.medpac.gov/payment_basics.cfm).";
String expectedComment2 = "Also in the Medicare Learning Network (MLN) “Payment System Fact Sheet Series”" + " (see: " + "http://www.cms.gov/Outreach-and-Education/Medicare-Learning-Network-MLN/MLNProducts/MLN-Publications.html" + ").";
String expectedComment3 = "DERIVATION RULES: If there is a value code '18' (i.e., in the Value Code File, if the" + " VAL_CD='18') then this dollar amount (VAL_AMT) is used to populate this field.\"";
assertEquals("Operating Disproportionate Share (DSH) Amount", variable.getLabel());
assertParagraphsEquals(Arrays.asList(expectedDescription1, expectedDescription2, expectedDescription3), variable.getDescription());
assertEquals("DSH_OP", variable.getShortName().get());
assertEquals("DSH_OP_CLM_VAL_AMT", variable.getLongName());
assertEquals(VariableType.NUM, variable.getType().get());
assertEquals(new Integer(12), variable.getLength());
assertEquals("NCH", variable.getSource().get());
assertEquals("XXX.XX", variable.getValueFormat().get());
assertFalse(variable.getValueGroups().isPresent());
assertParagraphsEquals(Arrays.asList(expectedComment1, expectedComment2, expectedComment3), variable.getComment());
}
use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.
the class PdfParserTest method findVariableSections.
/**
* Tests {@link gov.cms.bfd.model.codebook.extractor.PdfParser#findVariableSections(List)} against
* all {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s.
*
* @throws IOException Indicates test error.
*/
@Test
public void findVariableSections() throws IOException {
for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
try (InputStream codebookPdfStream = supportedCodebook.getCodebookPdfInputStream()) {
LOGGER.info("Looking for sections in codebook: {}", supportedCodebook.name());
/*
* Note: We leave the printXXX(...) calls here disabled unless/until they're
* needed to debug a specific problem, as they add a ton of log noise.
*/
List<String> codebookTextLines = PdfParser.extractTextLinesFromPdf(codebookPdfStream);
// printTextLinesToConsole(codebookTextLines);
List<List<String>> variableSections = PdfParser.findVariableSections(codebookTextLines);
for (List<String> variableSection : variableSections) {
assertNotNull(variableSection);
assertTrue(variableSection.size() >= 10);
}
/*
* How else can you verify that the section splitting code worked correctly?
* Pick a one-line field that should have a unique value in each section, find
* all instances of that field in the un-grouped lines, then make sure that each
* one of those unique field lines can be found in a section.
*/
Predicate<? super String> searchFieldFilter = l -> l.startsWith("SHORT_NAME:");
List<String> searchFieldLines = codebookTextLines.stream().filter(searchFieldFilter).collect(Collectors.toList());
// If this fails, we need to pick a different search field.
assertEquals(searchFieldLines.size(), new HashSet<>(searchFieldLines).size(), "Not all instances of that field are unique.");
for (String searchFieldLine : searchFieldLines) {
boolean foundSection = false;
for (List<String> variableSection : variableSections) {
for (String line : variableSection) if (searchFieldLine.equals(line))
foundSection = true;
}
assertTrue(foundSection, String.format("Can't find search field line: '%s'", searchFieldLine));
}
}
}
}
use of gov.cms.bfd.model.codebook.model.Codebook in project beneficiary-fhir-data by CMSgov.
the class PdfParser method parseCodebookPdf.
/**
* @param codebookSource the codebook to be converted
* @return a {@link Codebook} instance representing the data from the parsed codebook PDF
*/
public static Codebook parseCodebookPdf(SupportedCodebook codebookSource) {
try (InputStream codebookPdfStream = codebookSource.getCodebookPdfInputStream()) {
List<String> codebookTextLines = extractTextLinesFromPdf(codebookPdfStream);
Codebook codebook = new Codebook(codebookSource);
/*
* It's a bit inefficient, but we first go through all the lines and group them
* into the separate variable declarations represented by them. Just makes it
* easier to reason about the logic here.
*/
List<List<String>> variableSections = findVariableSections(codebookTextLines);
// Parse each section into the Variable model it represents.
for (List<String> variableSection : variableSections) {
Variable variable = parseVariable(codebook, variableSection);
codebook.getVariables().add(variable);
}
return codebook;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
Aggregations