use of gov.cms.bfd.model.codebook.model.Variable in project beneficiary-fhir-data by CMSgov.
the class CodebookVariableReader method buildVariablesMappedById.
/**
* @return a {@link Map} of the known {@link Codebook} {@link Variable}s, keyed by {@link
* Variable#getId()} (with duplicates removed safely)
*/
public static Map<String, Variable> buildVariablesMappedById() {
Map<String, List<Variable>> variablesMultimapById = buildVariablesMultimappedById();
Map<String, Variable> variablesMappedById = new LinkedHashMap<>(variablesMultimapById.size());
for (String id : variablesMultimapById.keySet()) {
List<Variable> variablesForId = variablesMultimapById.get(id);
if (variablesForId.size() == 1) {
Variable variable = variablesForId.get(0);
variablesMappedById.put(variable.getId(), variable);
} else if (Arrays.asList("BENE_ID", "DOB_DT", "GNDR_CD").contains(id)) {
Variable variable = variablesForId.get(0);
variablesMappedById.put(variable.getId(), variable);
/*
* FIXME The code books for part a/b/d and a/b/c/d have
* overlapping fields between them. They also have fields that
* aren't in the other code book so we need to include both code
* books ..codebook-mbsf-abd.pdf and codebook-mbsf-abcd.pdf.
* Thus the reason to allow duplicate fields below.
*/
} else if (variablesForId.size() == 2) {
Variable variable = variablesForId.get(0);
variablesMappedById.put(variable.getId(), variable);
} else
throw new IllegalStateException(String.format("%s with duplicates found: %s", id, variablesForId));
}
return variablesMappedById;
}
use of gov.cms.bfd.model.codebook.model.Variable in project beneficiary-fhir-data by CMSgov.
the class PdfParserTest method parseCodebookPdf_CARR_LINE_PRVDR_TYPE_CD.
/**
* Tests {@link
* gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)} against
* {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook#FFS_CLAIMS} for the <code>
* CARR_LINE_PRVDR_TYPE_CD</code> variable.
*
* @throws IOException Indicates test error.
*/
@Test
public void parseCodebookPdf_CARR_LINE_PRVDR_TYPE_CD() throws IOException {
/*
* Why are we spot checking this one variable's parsed output? Because it's
* somewhat interesting: 1) it has multiple valueGroups, 2) many of its values
* have multiple lines, 3) many of its coded values are duplicated, 4) it has a
* COMMENT that's just "-".
*/
Codebook codebook = PdfParser.parseCodebookPdf(SupportedCodebook.FFS_CLAIMS);
Variable variable = codebook.getVariables().stream().filter(v -> v.getId().equals("CARR_LINE_PRVDR_TYPE_CD")).findAny().get();
String expectedDescription1 = "Code identifying the type of provider furnishing the service for this line" + " item on the carrier claim.";
assertEquals("Carrier Line Provider Type Code", variable.getLabel());
assertParagraphsEquals(Arrays.asList(expectedDescription1), variable.getDescription());
assertEquals("PRV_TYPE", variable.getShortName().get());
assertEquals("CARR_LINE_PRVDR_TYPE_CD", variable.getLongName());
assertEquals(VariableType.CHAR, variable.getType().get());
assertEquals(new Integer(1), variable.getLength());
assertEquals("NCH", variable.getSource().get());
assertFalse(variable.getValueFormat().isPresent());
assertEquals(2, variable.getValueGroups().get().size());
assertEquals(8, variable.getValueGroups().get().get(0).getValues().size());
assertParagraphsEquals(Arrays.asList("For Physician/Supplier Claims:"), variable.getValueGroups().get().get(0).getDescription());
assertEquals(9, variable.getValueGroups().get().get(1).getValues().size());
assertParagraphsEquals(Arrays.asList("NOTE: PRIOR TO VERSION H, DME claims also used this code; the" + " following were valid code VALUES:"), variable.getValueGroups().get().get(1).getDescription());
// Spot-check some of the values:
Value value_0_3 = variable.getValueGroups().get().get(0).getValues().get(3);
assertEquals("3", value_0_3.getCode());
assertEquals("Institutional provider", value_0_3.getDescription());
Value value_1_8 = variable.getValueGroups().get().get(1).getValues().get(8);
assertEquals("8", value_1_8.getCode());
assertEquals("Other entities for whom EI numbers are used in coding the ID field or proprietorship" + " for whom EI numbers are used in coding the ID field.", value_1_8.getDescription());
assertFalse(variable.getComment().isPresent());
}
use of gov.cms.bfd.model.codebook.model.Variable in project beneficiary-fhir-data by CMSgov.
the class PdfParserTest method parseCodebookPdf.
/**
* Tests {@link
* gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)} against all
* {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s.
*
* @throws IOException Indicates test error.
*/
@Test
public void parseCodebookPdf() throws IOException {
for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
LOGGER.info("Looking for sections in codebook: {}", supportedCodebook.name());
Codebook codebook = PdfParser.parseCodebookPdf(supportedCodebook);
/*
* Since this test runs against all of the PDFs, it's mostly just a
* "make sure things don't blow up" test case. Which is fine! But we can still
* verify some basic facts about the results.
*/
assertNotNull(codebook);
// Note: The 2017-05 version of the PDE codebook has 56 variables.
assertTrue(codebook.getVariables().size() > 50, "Not as many variables as expected: " + codebook.getVariables().size());
for (Variable variable : codebook.getVariables()) {
assertVariableIsValid(variable);
}
}
}
use of gov.cms.bfd.model.codebook.model.Variable in project beneficiary-fhir-data by CMSgov.
the class PdfParserTest method parseCodebookPdf_DSH_OP_CLM_VAL_AMT.
/**
* Tests {@link
* gov.cms.bfd.model.codebook.extractor.PdfParser#parseCodebookPdf(SupportedCodebook)}} against
* {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook#FFS_CLAIMS} for the <code>
* DSH_OP_CLM_VAL_AMT</code> variable.
*
* @throws IOException Indicates test error.
*/
@Test
public void parseCodebookPdf_DSH_OP_CLM_VAL_AMT() throws IOException {
/*
* Why are we spot checking this one variable's parsed output? Because it's
* somewhat interesting: 1) it has multiple paragraphs in its DESCRIPTION and
* COMMENT fields, 2) it's an example of a simple valueFormat, 3) its COMMENT
* has a line that looks like a new field, but isn't, and 4) its COMMENT also
* has a long URL that line breaks with a hyphen.
*/
Codebook codebook = PdfParser.parseCodebookPdf(SupportedCodebook.FFS_CLAIMS);
Variable variable = codebook.getVariables().stream().filter(v -> v.getId().equals("DSH_OP_CLM_VAL_AMT")).findAny().get();
String expectedDescription1 = "This is one component of the total amount that is payable on prospective" + " payment system (PPS) claims, and reflects the DSH (disproportionate share hospital) payments" + " for operating expenses (such as labor) for the claim.";
String expectedDescription2 = "There are two types of DSH amounts that may be payable for many PPS claims;" + " the other type of DSH payment is for the DSH capital amount (variable called" + " CLM_PPS_CPTL_DSPRPRTNT_SHR_AMT).";
String expectedDescription3 = "Both operating and capital DSH payments are components of the PPS, as well" + " as numerous other factors.";
String expectedComment1 = "Medicare payments are described in detail in a series of Medicare Payment" + " Advisory Commission (MedPAC) documents called “Payment Basics” (see:" + " http://www.medpac.gov/payment_basics.cfm).";
String expectedComment2 = "Also in the Medicare Learning Network (MLN) “Payment System Fact Sheet Series”" + " (see: " + "http://www.cms.gov/Outreach-and-Education/Medicare-Learning-Network-MLN/MLNProducts/MLN-Publications.html" + ").";
String expectedComment3 = "DERIVATION RULES: If there is a value code '18' (i.e., in the Value Code File, if the" + " VAL_CD='18') then this dollar amount (VAL_AMT) is used to populate this field.\"";
assertEquals("Operating Disproportionate Share (DSH) Amount", variable.getLabel());
assertParagraphsEquals(Arrays.asList(expectedDescription1, expectedDescription2, expectedDescription3), variable.getDescription());
assertEquals("DSH_OP", variable.getShortName().get());
assertEquals("DSH_OP_CLM_VAL_AMT", variable.getLongName());
assertEquals(VariableType.NUM, variable.getType().get());
assertEquals(new Integer(12), variable.getLength());
assertEquals("NCH", variable.getSource().get());
assertEquals("XXX.XX", variable.getValueFormat().get());
assertFalse(variable.getValueGroups().isPresent());
assertParagraphsEquals(Arrays.asList(expectedComment1, expectedComment2, expectedComment3), variable.getComment());
}
use of gov.cms.bfd.model.codebook.model.Variable in project beneficiary-fhir-data by CMSgov.
the class PdfParser method parseCodebookPdf.
/**
* @param codebookSource the codebook to be converted
* @return a {@link Codebook} instance representing the data from the parsed codebook PDF
*/
public static Codebook parseCodebookPdf(SupportedCodebook codebookSource) {
try (InputStream codebookPdfStream = codebookSource.getCodebookPdfInputStream()) {
List<String> codebookTextLines = extractTextLinesFromPdf(codebookPdfStream);
Codebook codebook = new Codebook(codebookSource);
/*
* It's a bit inefficient, but we first go through all the lines and group them
* into the separate variable declarations represented by them. Just makes it
* easier to reason about the logic here.
*/
List<List<String>> variableSections = findVariableSections(codebookTextLines);
// Parse each section into the Variable model it represents.
for (List<String> variableSection : variableSections) {
Variable variable = parseVariable(codebook, variableSection);
codebook.getVariables().add(variable);
}
return codebook;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
Aggregations