Search in sources :

Example 1 with RifFileType

use of gov.cms.bfd.model.rif.RifFileType in project beneficiary-fhir-data by CMSgov.

the class DataSetSubsetter method createSubset.

/**
 * Creates a subset of the specified input {@link RifFile}s, writing out the results via the
 * {@link CSVPrinter}s provided by the specified {@link IDataSetWriter}.
 *
 * @param output the {@link IDataSetWriter} to get the needed {@link CSVPrinter}s from
 * @param beneficiaryCount the target beneficiary count of the copy/subset to create
 * @param rifFiles the input {@link RifFile}s to be subsetted
 * @throws IOException Any {@link IOException}s encountered will be bubbled up.
 */
public static void createSubset(IDataSetWriter output, int beneficiaryCount, List<RifFile> rifFiles) throws IOException {
    LOGGER.info("Scanning beneficiary IDs...");
    List<RifFile> beneficiaryFiles = rifFiles.stream().filter(f -> f.getFileType() == RifFileType.BENEFICIARY).collect(Collectors.toList());
    List<String> beneficiaryIds = new ArrayList<>();
    for (RifFile beneficiaryFile : beneficiaryFiles) {
        CSVParser parser = RifParsingUtils.createCsvParser(beneficiaryFile);
        parser.forEach(r -> {
            String beneficiaryId = r.get(BeneficiaryColumn.BENE_ID);
            if (beneficiaryIds.contains(beneficiaryId))
                throw new IllegalStateException();
            beneficiaryIds.add(beneficiaryId);
        });
        parser.close();
    }
    LOGGER.info("Scanned beneficiary IDs.");
    Set<String> selectedBeneficiaryIds = new HashSet<>(beneficiaryCount);
    Collections.shuffle(beneficiaryIds);
    for (int i = 0; i < beneficiaryCount; i++) selectedBeneficiaryIds.add(beneficiaryIds.get(i));
    LOGGER.info("Selected '{}' random beneficiary IDs.", beneficiaryCount);
    Map<RifFileType, Enum<?>> beneficiaryColumnByFileType = new HashMap<>();
    beneficiaryColumnByFileType.put(RifFileType.BENEFICIARY, BeneficiaryColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.CARRIER, CarrierClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.DME, DMEClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.HHA, HHAClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.HOSPICE, HospiceClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.INPATIENT, InpatientClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.OUTPATIENT, OutpatientClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.PDE, PartDEventColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.SNF, SNFClaimColumn.BENE_ID);
    for (RifFile rifFile : rifFiles) {
        LOGGER.info("Subsetting RIF file: '{}'...", rifFile.getDisplayName());
        CSVPrinter rifFilePrinter = output.getPrinter(rifFile.getFileType());
        CSVParser parser = RifParsingUtils.createCsvParser(rifFile);
        /*
       * When we created the CSVPrinter, we told it to skip the header.
       * That ensures that we don't write out a header until we've started
       * reading the file and know what it is. Here, we print a "fake"
       * first record with the header, as read from the input file.
       * Previously, we'd been having the CSVPrinter create a header based
       * on our RIF column enums, but that leads to us propagating errors
       * in those enums to the sample files. It's better to let the files
       * tell us what their headers are.
       */
        rifFilePrinter.printRecord(parser.getHeaderMap().entrySet().stream().sorted(Map.Entry.comparingByValue()).map(e -> e.getKey()).toArray());
        parser.forEach(r -> {
            String beneficiaryId = r.get(beneficiaryColumnByFileType.get(rifFile.getFileType()));
            if (selectedBeneficiaryIds.contains(beneficiaryId))
                try {
                    rifFilePrinter.printRecord(r);
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
        });
    }
    LOGGER.info("Subsetted all RIF files.");
}
Also used : Arrays(java.util.Arrays) CarrierClaimColumn(gov.cms.bfd.model.rif.CarrierClaimColumn) RifFileType(gov.cms.bfd.model.rif.RifFileType) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) S3Utilities(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3Utilities) LoggerFactory(org.slf4j.LoggerFactory) SNFClaimColumn(gov.cms.bfd.model.rif.SNFClaimColumn) HHAClaimColumn(gov.cms.bfd.model.rif.HHAClaimColumn) CSVFormat(org.apache.commons.csv.CSVFormat) Map(java.util.Map) CSVParser(org.apache.commons.csv.CSVParser) Path(java.nio.file.Path) TransferManagerBuilder(com.amazonaws.services.s3.transfer.TransferManagerBuilder) InpatientClaimColumn(gov.cms.bfd.model.rif.InpatientClaimColumn) Set(java.util.Set) RifFile(gov.cms.bfd.model.rif.RifFile) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) JAXBException(javax.xml.bind.JAXBException) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) UncheckedJaxbException(gov.cms.bfd.sharedutils.exceptions.UncheckedJaxbException) Entry(java.util.Map.Entry) RifParsingUtils(gov.cms.bfd.model.rif.parse.RifParsingUtils) AmazonClientException(com.amazonaws.AmazonClientException) BeneficiaryColumn(gov.cms.bfd.model.rif.BeneficiaryColumn) CSVPrinter(org.apache.commons.csv.CSVPrinter) TransferManager(com.amazonaws.services.s3.transfer.TransferManager) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) OutpatientClaimColumn(gov.cms.bfd.model.rif.OutpatientClaimColumn) HospiceClaimColumn(gov.cms.bfd.model.rif.HospiceClaimColumn) Marshaller(javax.xml.bind.Marshaller) HashMap(java.util.HashMap) ExtractionOptions(gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions) Download(com.amazonaws.services.s3.transfer.Download) ArrayList(java.util.ArrayList) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) HashSet(java.util.HashSet) TestDataSetLocation(gov.cms.bfd.model.rif.samples.TestDataSetLocation) AmazonS3(com.amazonaws.services.s3.AmazonS3) PartDEventColumn(gov.cms.bfd.model.rif.PartDEventColumn) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileWriter(java.io.FileWriter) IOException(java.io.IOException) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) Paths(java.nio.file.Paths) DMEClaimColumn(gov.cms.bfd.model.rif.DMEClaimColumn) Collections(java.util.Collections) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) RifFile(gov.cms.bfd.model.rif.RifFile) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) UncheckedIOException(java.io.UncheckedIOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) RifFileType(gov.cms.bfd.model.rif.RifFileType) CSVPrinter(org.apache.commons.csv.CSVPrinter) CSVParser(org.apache.commons.csv.CSVParser) HashSet(java.util.HashSet)

Example 2 with RifFileType

use of gov.cms.bfd.model.rif.RifFileType in project beneficiary-fhir-data by CMSgov.

the class SampleDataColumnsTest method logEnumColumns.

/**
 * No assertions here: it just logs out the enum columns for posterity and other uses.
 */
@Test
public void logEnumColumns() {
    for (RifFileType rifFileType : RifFileType.values()) {
        Enum<?>[] columnsInEnum = getColumnsInEnum(rifFileType);
        LOGGER.info("Enum columns for '{}': {}", columnsInEnum[0].getDeclaringClass().getName(), toHeaderFormat(columnsInEnum, c -> c.name()));
    }
}
Also used : Arrays(java.util.Arrays) CarrierClaimColumn(gov.cms.bfd.model.rif.CarrierClaimColumn) Logger(org.slf4j.Logger) RifFileType(gov.cms.bfd.model.rif.RifFileType) BadCodeMonkeyException(gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException) CSVRecord(org.apache.commons.csv.CSVRecord) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) Function(java.util.function.Function) InvocationTargetException(java.lang.reflect.InvocationTargetException) UncheckedIOException(java.io.UncheckedIOException) Test(org.junit.jupiter.api.Test) List(java.util.List) CSVFormat(org.apache.commons.csv.CSVFormat) CSVParser(org.apache.commons.csv.CSVParser) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) RifParsingUtils(gov.cms.bfd.model.rif.parse.RifParsingUtils) BeneficiaryColumn(gov.cms.bfd.model.rif.BeneficiaryColumn) RifFileType(gov.cms.bfd.model.rif.RifFileType) Test(org.junit.jupiter.api.Test)

Example 3 with RifFileType

use of gov.cms.bfd.model.rif.RifFileType in project beneficiary-fhir-data by CMSgov.

the class RifLoader method process.

/**
 * @param recordsBatch the {@link RifRecordEvent}s to process
 * @param loadedFileBuilder the builder for the {@LoadedFile} associated with this batch
 * @param postgresBatch the {@link PostgreSqlCopyInserter} for the current set of {@link
 *     RifFilesEvent}s being processed
 * @return the {@link RifRecordLoadResult}s that model the results of the operation
 */
private List<RifRecordLoadResult> process(List<RifRecordEvent<?>> recordsBatch, long loadedFileId, PostgreSqlCopyInserter postgresBatch) {
    RifFileEvent fileEvent = recordsBatch.get(0).getFileEvent();
    MetricRegistry fileEventMetrics = fileEvent.getEventMetrics();
    RifFileType rifFileType = fileEvent.getFile().getFileType();
    if (rifFileType == RifFileType.BENEFICIARY_HISTORY) {
        for (RifRecordEvent<?> rifRecordEvent : recordsBatch) {
            hashBeneficiaryHistoryHicn(rifRecordEvent);
            hashBeneficiaryHistoryMbi(rifRecordEvent);
        }
    }
    // Only one of each failure/success Timer.Contexts will be applied.
    Timer.Context timerBatchSuccess = appState.getMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches")).time();
    Timer.Context timerBatchTypeSuccess = fileEventMetrics.timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", rifFileType.name())).time();
    Timer.Context timerBundleFailure = appState.getMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).time();
    EntityManager entityManager = null;
    EntityTransaction txn = null;
    // TODO: refactor the following to be less of an indented mess
    try {
        entityManager = appState.getEntityManagerFactory().createEntityManager();
        txn = entityManager.getTransaction();
        txn.begin();
        List<RifRecordLoadResult> loadResults = new ArrayList<>(recordsBatch.size());
        /*
       * Dev Note: All timestamps of records in the batch and the LoadedBatch must be the same for data consistency.
       * The timestamp from the LoadedBatchBuilder is used.
       */
        LoadedBatchBuilder loadedBatchBuilder = new LoadedBatchBuilder(loadedFileId, recordsBatch.size());
        for (RifRecordEvent<?> rifRecordEvent : recordsBatch) {
            RecordAction recordAction = rifRecordEvent.getRecordAction();
            RifRecordBase record = rifRecordEvent.getRecord();
            LOGGER.trace("Loading '{}' record.", rifFileType);
            // Set lastUpdated to the same value for the whole batch
            record.setLastUpdated(Optional.of(loadedBatchBuilder.getTimestamp()));
            // Associate the beneficiary with this file loaded
            loadedBatchBuilder.associateBeneficiary(rifRecordEvent.getBeneficiaryId());
            LoadStrategy strategy = selectStrategy(recordAction);
            LoadAction loadAction;
            if (strategy == LoadStrategy.INSERT_IDEMPOTENT) {
                // Check to see if record already exists.
                Timer.Context timerIdempotencyQuery = fileEventMetrics.timer(MetricRegistry.name(getClass().getSimpleName(), "idempotencyQueries")).time();
                Object recordId = appState.getEntityManagerFactory().getPersistenceUnitUtil().getIdentifier(record);
                Objects.requireNonNull(recordId);
                Object recordInDb = entityManager.find(record.getClass(), recordId);
                timerIdempotencyQuery.close();
                // Log if we have a non-2022 enrollment year INSERT
                if (isBackdatedBene(rifRecordEvent)) {
                    Beneficiary bene = (Beneficiary) rifRecordEvent.getRecord();
                    LOGGER.info("Inserted beneficiary with non-2022 enrollment year (beneficiaryId={})", bene.getBeneficiaryId());
                }
                if (recordInDb == null) {
                    loadAction = LoadAction.INSERTED;
                    tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
                    entityManager.persist(record);
                // FIXME Object recordInDbAfterUpdate = entityManager.find(record.getClass(), recordId);
                } else {
                    loadAction = LoadAction.DID_NOTHING;
                }
            } else if (strategy == LoadStrategy.INSERT_UPDATE_NON_IDEMPOTENT) {
                if (rifRecordEvent.getRecordAction().equals(RecordAction.INSERT)) {
                    loadAction = LoadAction.INSERTED;
                    // Log if we have a non-2022 enrollment year INSERT
                    if (isBackdatedBene(rifRecordEvent)) {
                        Beneficiary bene = (Beneficiary) rifRecordEvent.getRecord();
                        LOGGER.info("Inserted beneficiary with non-2022 enrollment year (beneficiaryId={})", bene.getBeneficiaryId());
                    }
                    tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
                    entityManager.persist(record);
                } else if (rifRecordEvent.getRecordAction().equals(RecordAction.UPDATE)) {
                    loadAction = LoadAction.UPDATED;
                    // Skip this record if the year is not 2022 and its an update.
                    if (isBackdatedBene(rifRecordEvent)) {
                        /*
               * Serialize the record's CSV data back to actual RIF/CSV, as that's how we'll store
               * it in the DB.
               */
                        StringBuffer rifData = new StringBuffer();
                        try (CSVPrinter csvPrinter = new CSVPrinter(rifData, RifParsingUtils.CSV_FORMAT)) {
                            for (CSVRecord csvRow : rifRecordEvent.getRawCsvRecords()) {
                                csvPrinter.printRecord(csvRow);
                            }
                        }
                        // Save the skipped record to the DB.
                        SkippedRifRecord skippedRifRecord = new SkippedRifRecord(rifRecordEvent.getFileEvent().getParentFilesEvent().getTimestamp(), SkipReasonCode.DELAYED_BACKDATED_ENROLLMENT_BFD_1566, rifRecordEvent.getFileEvent().getFile().getFileType().name(), rifRecordEvent.getRecordAction(), ((Beneficiary) record).getBeneficiaryId(), rifData.toString());
                        entityManager.persist(skippedRifRecord);
                        LOGGER.info("Skipped RIF record, due to '{}'.", skippedRifRecord.getSkipReason());
                    } else {
                        tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
                        entityManager.merge(record);
                    }
                } else {
                    throw new BadCodeMonkeyException(String.format("Unhandled %s: '%s'.", RecordAction.class, rifRecordEvent.getRecordAction()));
                }
            } else
                throw new BadCodeMonkeyException();
            LOGGER.trace("Loaded '{}' record.", rifFileType);
            fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "records", loadAction.name())).mark(1);
            loadResults.add(new RifRecordLoadResult(rifRecordEvent, loadAction));
        }
        LoadedBatch loadedBatch = loadedBatchBuilder.build();
        entityManager.persist(loadedBatch);
        txn.commit();
        // Update the metrics now that things have been pushed.
        timerBatchSuccess.stop();
        timerBatchTypeSuccess.stop();
        return loadResults;
    } catch (Throwable t) {
        timerBundleFailure.stop();
        fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).mark(1);
        LOGGER.warn("Failed to load '{}' record.", rifFileType, t);
        throw new RifLoadFailure(recordsBatch, t);
    } finally {
        /*
       * Some errors (e.g. HSQL constraint violations) seem to cause the
       * rollback to fail. Extra error handling is needed here, too, to
       * ensure that the failing data is captured.
       */
        try {
            if (txn != null && txn.isActive())
                txn.rollback();
        } catch (Throwable t) {
            timerBundleFailure.stop();
            fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).mark(1);
            LOGGER.warn("Failed to load '{}' record.", rifFileType, t);
            throw new RifLoadFailure(recordsBatch, t);
        }
        if (entityManager != null)
            entityManager.close();
    }
}
Also used : RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) ArrayList(java.util.ArrayList) SkippedRifRecord(gov.cms.bfd.model.rif.SkippedRifRecord) RifFileType(gov.cms.bfd.model.rif.RifFileType) LoadedBatchBuilder(gov.cms.bfd.model.rif.LoadedBatchBuilder) CSVPrinter(org.apache.commons.csv.CSVPrinter) LoadAction(gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult.LoadAction) RifRecordBase(gov.cms.bfd.model.rif.RifRecordBase) EntityTransaction(javax.persistence.EntityTransaction) BadCodeMonkeyException(gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException) MetricRegistry(com.codahale.metrics.MetricRegistry) EntityManager(javax.persistence.EntityManager) Timer(com.codahale.metrics.Timer) RecordAction(gov.cms.bfd.model.rif.RecordAction) CSVRecord(org.apache.commons.csv.CSVRecord) Beneficiary(gov.cms.bfd.model.rif.Beneficiary) LoadedBatch(gov.cms.bfd.model.rif.LoadedBatch)

Example 4 with RifFileType

use of gov.cms.bfd.model.rif.RifFileType in project beneficiary-fhir-data by CMSgov.

the class AppConfiguration method readCcwRifLoadOptionsFromEnvironmentVariables.

@Nullable
static CcwRifLoadOptions readCcwRifLoadOptionsFromEnvironmentVariables(LoadAppOptions loadOptions) {
    final boolean enabled = readEnvBooleanOptional(ENV_VAR_KEY_CCW_RIF_JOB_ENABLED).orElse(true);
    if (!enabled) {
        return null;
    }
    final String s3BucketName = readEnvStringRequired(ENV_VAR_KEY_BUCKET);
    final Optional<String> rifFilterText = readEnvStringOptional(ENV_VAR_KEY_ALLOWED_RIF_TYPE);
    final Optional<RifFileType> allowedRifFileType;
    if (rifFilterText.isPresent()) {
        try {
            allowedRifFileType = Optional.of(RifFileType.valueOf(rifFilterText.get()));
        } catch (IllegalArgumentException e) {
            throw new AppConfigurationException(String.format("Invalid value for configuration environment variable '%s': '%s'", ENV_VAR_KEY_ALLOWED_RIF_TYPE, rifFilterText), e);
        }
    } else {
        allowedRifFileType = Optional.empty();
    }
    /*
     * Just for convenience: make sure DefaultAWSCredentialsProviderChain
     * has whatever it needs.
     */
    try {
        DefaultAWSCredentialsProviderChain awsCredentialsProvider = new DefaultAWSCredentialsProviderChain();
        awsCredentialsProvider.getCredentials();
    } catch (AmazonClientException e) {
        /*
       * The credentials provider should throw this if it can't find what
       * it needs.
       */
        throw new AppConfigurationException(String.format("Missing configuration for AWS credentials (for %s).", DefaultAWSCredentialsProviderChain.class.getName()), e);
    }
    ExtractionOptions extractionOptions = new ExtractionOptions(s3BucketName, allowedRifFileType);
    CcwRifLoadOptions ccwRifLoadOptions = new CcwRifLoadOptions(extractionOptions, loadOptions);
    return ccwRifLoadOptions;
}
Also used : DefaultAWSCredentialsProviderChain(com.amazonaws.auth.DefaultAWSCredentialsProviderChain) AmazonClientException(com.amazonaws.AmazonClientException) ExtractionOptions(gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions) CcwRifLoadOptions(gov.cms.bfd.pipeline.ccw.rif.CcwRifLoadOptions) RifFileType(gov.cms.bfd.model.rif.RifFileType) Nullable(javax.annotation.Nullable)

Aggregations

RifFileType (gov.cms.bfd.model.rif.RifFileType)4 AmazonClientException (com.amazonaws.AmazonClientException)2 BeneficiaryColumn (gov.cms.bfd.model.rif.BeneficiaryColumn)2 CarrierClaimColumn (gov.cms.bfd.model.rif.CarrierClaimColumn)2 RifParsingUtils (gov.cms.bfd.model.rif.parse.RifParsingUtils)2 ExtractionOptions (gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions)2 BadCodeMonkeyException (gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException)2 IOException (java.io.IOException)2 UncheckedIOException (java.io.UncheckedIOException)2 Arrays (java.util.Arrays)2 List (java.util.List)2 CSVFormat (org.apache.commons.csv.CSVFormat)2 CSVParser (org.apache.commons.csv.CSVParser)2 Logger (org.slf4j.Logger)2 LoggerFactory (org.slf4j.LoggerFactory)2 DefaultAWSCredentialsProviderChain (com.amazonaws.auth.DefaultAWSCredentialsProviderChain)1 AmazonS3 (com.amazonaws.services.s3.AmazonS3)1 Download (com.amazonaws.services.s3.transfer.Download)1 TransferManager (com.amazonaws.services.s3.transfer.TransferManager)1 TransferManagerBuilder (com.amazonaws.services.s3.transfer.TransferManagerBuilder)1