use of gov.cms.bfd.model.codebook.model.ValueGroup in project beneficiary-fhir-data by CMSgov.
the class SupportedCodebookTest method findDuplicateCodes.
/**
* Parses all of the {@link gov.cms.bfd.model.codebook.extractor.SupportedCodebook}s using {@link
* gov.cms.bfd.model.codebook.extractor.PdfParser}, looking for duplicate {@link Value#getCode()}s
* within each {@link Variable}.
*
* @throws IOException Indicates test error.
*/
@Test
public void findDuplicateCodes() throws IOException {
for (SupportedCodebook supportedCodebook : SupportedCodebook.values()) {
Codebook codebook = PdfParser.parseCodebookPdf(supportedCodebook);
for (Variable variable : codebook.getVariables()) {
if (!variable.getValueGroups().isPresent())
continue;
// Build a multimap of all the Values by their codes.
Map<String, List<Value>> valuesByCode = new LinkedHashMap<>();
for (ValueGroup valueGroup : variable.getValueGroups().get()) {
for (Value value : valueGroup.getValues()) {
if (!valuesByCode.containsKey(value.getCode()))
valuesByCode.put(value.getCode(), new ArrayList<>());
valuesByCode.get(value.getCode()).add(value);
}
}
// Find all of the codes that appear in more than one Value.
List<String> duplicatedCodes = new ArrayList<>();
for (String code : valuesByCode.keySet()) {
List<Value> values = valuesByCode.get(code);
if (values.size() > 1)
duplicatedCodes.add(code);
}
// Log a detailed warning for each duplicate.
for (String duplicatedCode : duplicatedCodes) {
List<Value> duplicatedValues = valuesByCode.get(duplicatedCode);
LOGGER.warn("The code '{}' appears more than once in Variable '{}': {}.", duplicatedCode, variable, duplicatedValues);
}
}
}
}
use of gov.cms.bfd.model.codebook.model.ValueGroup in project beneficiary-fhir-data by CMSgov.
the class PdfParser method parseValueGroups.
/**
* @param variableSection the variable section to parse the value from
* @return the {@link Variable#getValueGroups()} value from the specified {@link Variable} raw
* text section, or <code>null</code> if it was not present
*/
private static List<ValueGroup> parseValueGroups(List<String> variableSection) {
/*
* The parsing strategy here is basically this: 1) each Variable has EITHER a
* valueFormat or valueGroups, 2) if the field value includes at least one
* "XX = YY" line, it's a valueGroups, 3) otherwise (if it doesn't contain a
* code list), it's a valueFormat.
*/
String variableId = parseId(variableSection);
List<String> fieldLines = extractFieldContent(variableSection, FIELD_NAME_VALUES, FIELD_NAME_VALUES_ALT1);
if (fieldLines == null)
throw new IllegalStateException(String.format("Invalid '%s' field in variable section: %s", FIELD_NAME_VALUES, variableSection));
// Does this field have a an "XX = YY" coded value?
boolean foundCodedValue = false;
for (String line : variableSection) {
if (PATTERN_VALUE_LINE_WITH_CODE.matcher(line).matches())
foundCodedValue = true;
}
if (!foundCodedValue)
return null;
/*
* Now we know we're dealing with coded values, so we need to parse those.
*/
List<ValueGroup> valueGroups = new ArrayList<>();
ValueGroup currentValueGroup = new ValueGroup();
List<String> currentValueGroupDescription = new ArrayList<>();
List<String> currentValue = new ArrayList<>();
for (int fieldLineIndex = 0; fieldLineIndex < fieldLines.size(); fieldLineIndex++) {
String fieldLine = fieldLines.get(fieldLineIndex);
if (isValueGroupDescription(variableId, fieldLines, fieldLineIndex)) {
if (!currentValue.isEmpty()) {
// FYI: We just ended a Value that needs to be collected.
Value completedValue = parseValue(currentValue);
currentValueGroup.getValues().add(completedValue);
currentValue = new ArrayList<>();
// FYI: We also just ended a ValueGroup that needs to be collected.
valueGroups.add(currentValueGroup);
currentValueGroup = new ValueGroup();
}
// Regardless of what else is happening, always collect the line.
currentValueGroupDescription.add(fieldLine);
} else {
// FYI: We're in a Value.
boolean isLineStartOfValue = PATTERN_VALUE_LINE_WITH_CODE.matcher(fieldLine).matches();
if (!currentValueGroupDescription.isEmpty()) {
// FYI: We just ended a ValueGroup description that needs to be collected.
List<String> valueGroupDescriptionParagraphs = extractParagraphs(currentValueGroupDescription);
currentValueGroup.setDescription(valueGroupDescriptionParagraphs);
currentValueGroupDescription = new ArrayList<>();
}
if (isLineStartOfValue && !currentValue.isEmpty()) {
// FYI: We're starting a new Value and need to collect the previous one.
Value completedValue = parseValue(currentValue);
currentValueGroup.getValues().add(completedValue);
currentValue = new ArrayList<>();
}
// Regardless of what else is happening, always collect the line.
currentValue.add(fieldLine);
}
}
if (!currentValue.isEmpty()) {
// FYI: We're through all lines but need to collect the last Value.
Value completedValue = parseValue(currentValue);
currentValueGroup.getValues().add(completedValue);
// FYI: We also just ended a ValueGroup that needs to be collected.
valueGroups.add(currentValueGroup);
}
// Sanity check: make sure we don't have any leftovers.
if (!currentValueGroupDescription.isEmpty())
throw new BadCodeMonkeyException();
return valueGroups;
}
Aggregations