use of gov.cms.bfd.model.rif.RifFileType in project beneficiary-fhir-data by CMSgov.
the class DataSetSubsetter method createSubset.
/**
* Creates a subset of the specified input {@link RifFile}s, writing out the results via the
* {@link CSVPrinter}s provided by the specified {@link IDataSetWriter}.
*
* @param output the {@link IDataSetWriter} to get the needed {@link CSVPrinter}s from
* @param beneficiaryCount the target beneficiary count of the copy/subset to create
* @param rifFiles the input {@link RifFile}s to be subsetted
* @throws IOException Any {@link IOException}s encountered will be bubbled up.
*/
public static void createSubset(IDataSetWriter output, int beneficiaryCount, List<RifFile> rifFiles) throws IOException {
LOGGER.info("Scanning beneficiary IDs...");
List<RifFile> beneficiaryFiles = rifFiles.stream().filter(f -> f.getFileType() == RifFileType.BENEFICIARY).collect(Collectors.toList());
List<String> beneficiaryIds = new ArrayList<>();
for (RifFile beneficiaryFile : beneficiaryFiles) {
CSVParser parser = RifParsingUtils.createCsvParser(beneficiaryFile);
parser.forEach(r -> {
String beneficiaryId = r.get(BeneficiaryColumn.BENE_ID);
if (beneficiaryIds.contains(beneficiaryId))
throw new IllegalStateException();
beneficiaryIds.add(beneficiaryId);
});
parser.close();
}
LOGGER.info("Scanned beneficiary IDs.");
Set<String> selectedBeneficiaryIds = new HashSet<>(beneficiaryCount);
Collections.shuffle(beneficiaryIds);
for (int i = 0; i < beneficiaryCount; i++) selectedBeneficiaryIds.add(beneficiaryIds.get(i));
LOGGER.info("Selected '{}' random beneficiary IDs.", beneficiaryCount);
Map<RifFileType, Enum<?>> beneficiaryColumnByFileType = new HashMap<>();
beneficiaryColumnByFileType.put(RifFileType.BENEFICIARY, BeneficiaryColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.CARRIER, CarrierClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.DME, DMEClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.HHA, HHAClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.HOSPICE, HospiceClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.INPATIENT, InpatientClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.OUTPATIENT, OutpatientClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.PDE, PartDEventColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.SNF, SNFClaimColumn.BENE_ID);
for (RifFile rifFile : rifFiles) {
LOGGER.info("Subsetting RIF file: '{}'...", rifFile.getDisplayName());
CSVPrinter rifFilePrinter = output.getPrinter(rifFile.getFileType());
CSVParser parser = RifParsingUtils.createCsvParser(rifFile);
/*
* When we created the CSVPrinter, we told it to skip the header.
* That ensures that we don't write out a header until we've started
* reading the file and know what it is. Here, we print a "fake"
* first record with the header, as read from the input file.
* Previously, we'd been having the CSVPrinter create a header based
* on our RIF column enums, but that leads to us propagating errors
* in those enums to the sample files. It's better to let the files
* tell us what their headers are.
*/
rifFilePrinter.printRecord(parser.getHeaderMap().entrySet().stream().sorted(Map.Entry.comparingByValue()).map(e -> e.getKey()).toArray());
parser.forEach(r -> {
String beneficiaryId = r.get(beneficiaryColumnByFileType.get(rifFile.getFileType()));
if (selectedBeneficiaryIds.contains(beneficiaryId))
try {
rifFilePrinter.printRecord(r);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
LOGGER.info("Subsetted all RIF files.");
}
use of gov.cms.bfd.model.rif.RifFileType in project beneficiary-fhir-data by CMSgov.
the class SampleDataColumnsTest method logEnumColumns.
/**
* No assertions here: it just logs out the enum columns for posterity and other uses.
*/
@Test
public void logEnumColumns() {
for (RifFileType rifFileType : RifFileType.values()) {
Enum<?>[] columnsInEnum = getColumnsInEnum(rifFileType);
LOGGER.info("Enum columns for '{}': {}", columnsInEnum[0].getDeclaringClass().getName(), toHeaderFormat(columnsInEnum, c -> c.name()));
}
}
use of gov.cms.bfd.model.rif.RifFileType in project beneficiary-fhir-data by CMSgov.
the class RifLoader method process.
/**
* @param recordsBatch the {@link RifRecordEvent}s to process
* @param loadedFileBuilder the builder for the {@LoadedFile} associated with this batch
* @param postgresBatch the {@link PostgreSqlCopyInserter} for the current set of {@link
* RifFilesEvent}s being processed
* @return the {@link RifRecordLoadResult}s that model the results of the operation
*/
private List<RifRecordLoadResult> process(List<RifRecordEvent<?>> recordsBatch, long loadedFileId, PostgreSqlCopyInserter postgresBatch) {
RifFileEvent fileEvent = recordsBatch.get(0).getFileEvent();
MetricRegistry fileEventMetrics = fileEvent.getEventMetrics();
RifFileType rifFileType = fileEvent.getFile().getFileType();
if (rifFileType == RifFileType.BENEFICIARY_HISTORY) {
for (RifRecordEvent<?> rifRecordEvent : recordsBatch) {
hashBeneficiaryHistoryHicn(rifRecordEvent);
hashBeneficiaryHistoryMbi(rifRecordEvent);
}
}
// Only one of each failure/success Timer.Contexts will be applied.
Timer.Context timerBatchSuccess = appState.getMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches")).time();
Timer.Context timerBatchTypeSuccess = fileEventMetrics.timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", rifFileType.name())).time();
Timer.Context timerBundleFailure = appState.getMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).time();
EntityManager entityManager = null;
EntityTransaction txn = null;
// TODO: refactor the following to be less of an indented mess
try {
entityManager = appState.getEntityManagerFactory().createEntityManager();
txn = entityManager.getTransaction();
txn.begin();
List<RifRecordLoadResult> loadResults = new ArrayList<>(recordsBatch.size());
/*
* Dev Note: All timestamps of records in the batch and the LoadedBatch must be the same for data consistency.
* The timestamp from the LoadedBatchBuilder is used.
*/
LoadedBatchBuilder loadedBatchBuilder = new LoadedBatchBuilder(loadedFileId, recordsBatch.size());
for (RifRecordEvent<?> rifRecordEvent : recordsBatch) {
RecordAction recordAction = rifRecordEvent.getRecordAction();
RifRecordBase record = rifRecordEvent.getRecord();
LOGGER.trace("Loading '{}' record.", rifFileType);
// Set lastUpdated to the same value for the whole batch
record.setLastUpdated(Optional.of(loadedBatchBuilder.getTimestamp()));
// Associate the beneficiary with this file loaded
loadedBatchBuilder.associateBeneficiary(rifRecordEvent.getBeneficiaryId());
LoadStrategy strategy = selectStrategy(recordAction);
LoadAction loadAction;
if (strategy == LoadStrategy.INSERT_IDEMPOTENT) {
// Check to see if record already exists.
Timer.Context timerIdempotencyQuery = fileEventMetrics.timer(MetricRegistry.name(getClass().getSimpleName(), "idempotencyQueries")).time();
Object recordId = appState.getEntityManagerFactory().getPersistenceUnitUtil().getIdentifier(record);
Objects.requireNonNull(recordId);
Object recordInDb = entityManager.find(record.getClass(), recordId);
timerIdempotencyQuery.close();
// Log if we have a non-2022 enrollment year INSERT
if (isBackdatedBene(rifRecordEvent)) {
Beneficiary bene = (Beneficiary) rifRecordEvent.getRecord();
LOGGER.info("Inserted beneficiary with non-2022 enrollment year (beneficiaryId={})", bene.getBeneficiaryId());
}
if (recordInDb == null) {
loadAction = LoadAction.INSERTED;
tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
entityManager.persist(record);
// FIXME Object recordInDbAfterUpdate = entityManager.find(record.getClass(), recordId);
} else {
loadAction = LoadAction.DID_NOTHING;
}
} else if (strategy == LoadStrategy.INSERT_UPDATE_NON_IDEMPOTENT) {
if (rifRecordEvent.getRecordAction().equals(RecordAction.INSERT)) {
loadAction = LoadAction.INSERTED;
// Log if we have a non-2022 enrollment year INSERT
if (isBackdatedBene(rifRecordEvent)) {
Beneficiary bene = (Beneficiary) rifRecordEvent.getRecord();
LOGGER.info("Inserted beneficiary with non-2022 enrollment year (beneficiaryId={})", bene.getBeneficiaryId());
}
tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
entityManager.persist(record);
} else if (rifRecordEvent.getRecordAction().equals(RecordAction.UPDATE)) {
loadAction = LoadAction.UPDATED;
// Skip this record if the year is not 2022 and its an update.
if (isBackdatedBene(rifRecordEvent)) {
/*
* Serialize the record's CSV data back to actual RIF/CSV, as that's how we'll store
* it in the DB.
*/
StringBuffer rifData = new StringBuffer();
try (CSVPrinter csvPrinter = new CSVPrinter(rifData, RifParsingUtils.CSV_FORMAT)) {
for (CSVRecord csvRow : rifRecordEvent.getRawCsvRecords()) {
csvPrinter.printRecord(csvRow);
}
}
// Save the skipped record to the DB.
SkippedRifRecord skippedRifRecord = new SkippedRifRecord(rifRecordEvent.getFileEvent().getParentFilesEvent().getTimestamp(), SkipReasonCode.DELAYED_BACKDATED_ENROLLMENT_BFD_1566, rifRecordEvent.getFileEvent().getFile().getFileType().name(), rifRecordEvent.getRecordAction(), ((Beneficiary) record).getBeneficiaryId(), rifData.toString());
entityManager.persist(skippedRifRecord);
LOGGER.info("Skipped RIF record, due to '{}'.", skippedRifRecord.getSkipReason());
} else {
tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
entityManager.merge(record);
}
} else {
throw new BadCodeMonkeyException(String.format("Unhandled %s: '%s'.", RecordAction.class, rifRecordEvent.getRecordAction()));
}
} else
throw new BadCodeMonkeyException();
LOGGER.trace("Loaded '{}' record.", rifFileType);
fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "records", loadAction.name())).mark(1);
loadResults.add(new RifRecordLoadResult(rifRecordEvent, loadAction));
}
LoadedBatch loadedBatch = loadedBatchBuilder.build();
entityManager.persist(loadedBatch);
txn.commit();
// Update the metrics now that things have been pushed.
timerBatchSuccess.stop();
timerBatchTypeSuccess.stop();
return loadResults;
} catch (Throwable t) {
timerBundleFailure.stop();
fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).mark(1);
LOGGER.warn("Failed to load '{}' record.", rifFileType, t);
throw new RifLoadFailure(recordsBatch, t);
} finally {
/*
* Some errors (e.g. HSQL constraint violations) seem to cause the
* rollback to fail. Extra error handling is needed here, too, to
* ensure that the failing data is captured.
*/
try {
if (txn != null && txn.isActive())
txn.rollback();
} catch (Throwable t) {
timerBundleFailure.stop();
fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).mark(1);
LOGGER.warn("Failed to load '{}' record.", rifFileType, t);
throw new RifLoadFailure(recordsBatch, t);
}
if (entityManager != null)
entityManager.close();
}
}
use of gov.cms.bfd.model.rif.RifFileType in project beneficiary-fhir-data by CMSgov.
the class AppConfiguration method readCcwRifLoadOptionsFromEnvironmentVariables.
@Nullable
static CcwRifLoadOptions readCcwRifLoadOptionsFromEnvironmentVariables(LoadAppOptions loadOptions) {
final boolean enabled = readEnvBooleanOptional(ENV_VAR_KEY_CCW_RIF_JOB_ENABLED).orElse(true);
if (!enabled) {
return null;
}
final String s3BucketName = readEnvStringRequired(ENV_VAR_KEY_BUCKET);
final Optional<String> rifFilterText = readEnvStringOptional(ENV_VAR_KEY_ALLOWED_RIF_TYPE);
final Optional<RifFileType> allowedRifFileType;
if (rifFilterText.isPresent()) {
try {
allowedRifFileType = Optional.of(RifFileType.valueOf(rifFilterText.get()));
} catch (IllegalArgumentException e) {
throw new AppConfigurationException(String.format("Invalid value for configuration environment variable '%s': '%s'", ENV_VAR_KEY_ALLOWED_RIF_TYPE, rifFilterText), e);
}
} else {
allowedRifFileType = Optional.empty();
}
/*
* Just for convenience: make sure DefaultAWSCredentialsProviderChain
* has whatever it needs.
*/
try {
DefaultAWSCredentialsProviderChain awsCredentialsProvider = new DefaultAWSCredentialsProviderChain();
awsCredentialsProvider.getCredentials();
} catch (AmazonClientException e) {
/*
* The credentials provider should throw this if it can't find what
* it needs.
*/
throw new AppConfigurationException(String.format("Missing configuration for AWS credentials (for %s).", DefaultAWSCredentialsProviderChain.class.getName()), e);
}
ExtractionOptions extractionOptions = new ExtractionOptions(s3BucketName, allowedRifFileType);
CcwRifLoadOptions ccwRifLoadOptions = new CcwRifLoadOptions(extractionOptions, loadOptions);
return ccwRifLoadOptions;
}
Aggregations