Search in sources :

Example 1 with LoadedBatch

use of gov.cms.bfd.model.rif.LoadedBatch in project beneficiary-fhir-data by CMSgov.

the class LoadedFilterManager method buildFilter.

/**
 * Build a filter for this loaded file. Should be a pure function.
 *
 * @param fileId to build a filter for
 * @param firstUpdated time stamp
 * @param fetchById a function which returns a list of batches
 * @return a new filter
 */
public static LoadedFileFilter buildFilter(long fileId, Instant firstUpdated, Function<Long, List<LoadedBatch>> fetchById) {
    final List<LoadedBatch> loadedBatches = fetchById.apply(fileId);
    final int batchCount = loadedBatches.size();
    if (batchCount == 0) {
        throw new IllegalArgumentException("Batches cannot be empty for a filter");
    }
    final int batchSize = (loadedBatches.get(0).getBeneficiaries().length() + BENE_ID_SIZE) / BENE_ID_SIZE;
    // It is important to get a good estimate of the number of entries for
    // an accurate FFP and minimal memory size. This one assumes that all batches are of equal size.
    final BloomFilter bloomFilter = LoadedFileFilter.createFilter(batchSize * batchCount);
    // Loop through all batches, filling the bloom filter and finding the lastUpdated
    Instant lastUpdated = firstUpdated;
    for (LoadedBatch batch : loadedBatches) {
        for (String beneficiary : batch.getBeneficiariesAsList()) {
            bloomFilter.putString(beneficiary);
        }
        if (batch.getCreated().isAfter(lastUpdated)) {
            lastUpdated = batch.getCreated();
        }
    }
    LOGGER.info("Built a filter for {} with {} batches", fileId, loadedBatches.size());
    return new LoadedFileFilter(fileId, batchCount, firstUpdated, lastUpdated, bloomFilter);
}
Also used : Instant(java.time.Instant) BloomFilter(org.apache.spark.util.sketch.BloomFilter) LoadedBatch(gov.cms.bfd.model.rif.LoadedBatch)

Example 2 with LoadedBatch

use of gov.cms.bfd.model.rif.LoadedBatch in project beneficiary-fhir-data by CMSgov.

the class RifLoaderIT method loadBatches.

/**
 * Load the batches associated with a particular file
 *
 * @param entityManager to use
 * @param loadedFileId to use
 * @return array of ids
 */
private List<LoadedBatch> loadBatches(EntityManager entityManager, long loadedFileId) {
    CriteriaBuilder cb = entityManager.getCriteriaBuilder();
    CriteriaQuery<LoadedBatch> fetch = cb.createQuery(LoadedBatch.class);
    Root<LoadedBatch> b = fetch.from(LoadedBatch.class);
    fetch.where(cb.equal(b.get("loadedFileId"), loadedFileId));
    return entityManager.createQuery(fetch).getResultList();
}
Also used : CriteriaBuilder(javax.persistence.criteria.CriteriaBuilder) LoadedBatch(gov.cms.bfd.model.rif.LoadedBatch)

Example 3 with LoadedBatch

use of gov.cms.bfd.model.rif.LoadedBatch in project beneficiary-fhir-data by CMSgov.

the class LoadedFilterManagerTest method beforeAll.

@BeforeAll
public static void beforeAll() {
    // Create a few time stamps to play with
    Instant now = Instant.now().truncatedTo(ChronoUnit.DAYS);
    for (int i = 0; i < preDates.length; i++) {
        preDates[i] = now.plusSeconds(i);
    }
    List<String> beneficiaries = Collections.singletonList(SAMPLE_BENE);
    for (int i = 0; i < preBatches.length; i++) {
        preBatches[i] = new LoadedBatch(i + 1, (i / 2) + 1, beneficiaries, preDates[i * 5 + 4]);
    }
}
Also used : Instant(java.time.Instant) LoadedBatch(gov.cms.bfd.model.rif.LoadedBatch) BeforeAll(org.junit.jupiter.api.BeforeAll)

Example 4 with LoadedBatch

use of gov.cms.bfd.model.rif.LoadedBatch in project beneficiary-fhir-data by CMSgov.

the class LoadedFilterManager method fetchLoadedTuples.

/**
 * Fetch the tuple of (loadedFileId, LoadedFile.created, max(LoadedBatch.created))
 *
 * @param after limits the query to include batches created after this timestamp
 * @return tuples that meet the after criteria or an empty list
 */
private List<LoadedTuple> fetchLoadedTuples(Instant after) {
    final CriteriaBuilder cb = entityManager.getCriteriaBuilder();
    CriteriaQuery<LoadedTuple> query = cb.createQuery(LoadedTuple.class);
    final Root<LoadedFile> f = query.from(LoadedFile.class);
    Join<LoadedFile, LoadedBatch> b = f.join("batches");
    query = query.select(cb.construct(LoadedTuple.class, f.get("loadedFileId"), f.get("created"), cb.max(b.get("created"))));
    if (after != null) {
        query = query.where(cb.greaterThan(b.get("created"), after));
    }
    query = query.groupBy(f.get("loadedFileId"), f.get("created")).orderBy(cb.desc(f.get("created")));
    return entityManager.createQuery(query).getResultList();
}
Also used : CriteriaBuilder(javax.persistence.criteria.CriteriaBuilder) LoadedFile(gov.cms.bfd.model.rif.LoadedFile) LoadedBatch(gov.cms.bfd.model.rif.LoadedBatch)

Example 5 with LoadedBatch

use of gov.cms.bfd.model.rif.LoadedBatch in project beneficiary-fhir-data by CMSgov.

the class RifLoader method process.

/**
 * @param recordsBatch the {@link RifRecordEvent}s to process
 * @param loadedFileBuilder the builder for the {@LoadedFile} associated with this batch
 * @param postgresBatch the {@link PostgreSqlCopyInserter} for the current set of {@link
 *     RifFilesEvent}s being processed
 * @return the {@link RifRecordLoadResult}s that model the results of the operation
 */
private List<RifRecordLoadResult> process(List<RifRecordEvent<?>> recordsBatch, long loadedFileId, PostgreSqlCopyInserter postgresBatch) {
    RifFileEvent fileEvent = recordsBatch.get(0).getFileEvent();
    MetricRegistry fileEventMetrics = fileEvent.getEventMetrics();
    RifFileType rifFileType = fileEvent.getFile().getFileType();
    if (rifFileType == RifFileType.BENEFICIARY_HISTORY) {
        for (RifRecordEvent<?> rifRecordEvent : recordsBatch) {
            hashBeneficiaryHistoryHicn(rifRecordEvent);
            hashBeneficiaryHistoryMbi(rifRecordEvent);
        }
    }
    // Only one of each failure/success Timer.Contexts will be applied.
    Timer.Context timerBatchSuccess = appState.getMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches")).time();
    Timer.Context timerBatchTypeSuccess = fileEventMetrics.timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", rifFileType.name())).time();
    Timer.Context timerBundleFailure = appState.getMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).time();
    EntityManager entityManager = null;
    EntityTransaction txn = null;
    // TODO: refactor the following to be less of an indented mess
    try {
        entityManager = appState.getEntityManagerFactory().createEntityManager();
        txn = entityManager.getTransaction();
        txn.begin();
        List<RifRecordLoadResult> loadResults = new ArrayList<>(recordsBatch.size());
        /*
       * Dev Note: All timestamps of records in the batch and the LoadedBatch must be the same for data consistency.
       * The timestamp from the LoadedBatchBuilder is used.
       */
        LoadedBatchBuilder loadedBatchBuilder = new LoadedBatchBuilder(loadedFileId, recordsBatch.size());
        for (RifRecordEvent<?> rifRecordEvent : recordsBatch) {
            RecordAction recordAction = rifRecordEvent.getRecordAction();
            RifRecordBase record = rifRecordEvent.getRecord();
            LOGGER.trace("Loading '{}' record.", rifFileType);
            // Set lastUpdated to the same value for the whole batch
            record.setLastUpdated(Optional.of(loadedBatchBuilder.getTimestamp()));
            // Associate the beneficiary with this file loaded
            loadedBatchBuilder.associateBeneficiary(rifRecordEvent.getBeneficiaryId());
            LoadStrategy strategy = selectStrategy(recordAction);
            LoadAction loadAction;
            if (strategy == LoadStrategy.INSERT_IDEMPOTENT) {
                // Check to see if record already exists.
                Timer.Context timerIdempotencyQuery = fileEventMetrics.timer(MetricRegistry.name(getClass().getSimpleName(), "idempotencyQueries")).time();
                Object recordId = appState.getEntityManagerFactory().getPersistenceUnitUtil().getIdentifier(record);
                Objects.requireNonNull(recordId);
                Object recordInDb = entityManager.find(record.getClass(), recordId);
                timerIdempotencyQuery.close();
                // Log if we have a non-2022 enrollment year INSERT
                if (isBackdatedBene(rifRecordEvent)) {
                    Beneficiary bene = (Beneficiary) rifRecordEvent.getRecord();
                    LOGGER.info("Inserted beneficiary with non-2022 enrollment year (beneficiaryId={})", bene.getBeneficiaryId());
                }
                if (recordInDb == null) {
                    loadAction = LoadAction.INSERTED;
                    tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
                    entityManager.persist(record);
                // FIXME Object recordInDbAfterUpdate = entityManager.find(record.getClass(), recordId);
                } else {
                    loadAction = LoadAction.DID_NOTHING;
                }
            } else if (strategy == LoadStrategy.INSERT_UPDATE_NON_IDEMPOTENT) {
                if (rifRecordEvent.getRecordAction().equals(RecordAction.INSERT)) {
                    loadAction = LoadAction.INSERTED;
                    // Log if we have a non-2022 enrollment year INSERT
                    if (isBackdatedBene(rifRecordEvent)) {
                        Beneficiary bene = (Beneficiary) rifRecordEvent.getRecord();
                        LOGGER.info("Inserted beneficiary with non-2022 enrollment year (beneficiaryId={})", bene.getBeneficiaryId());
                    }
                    tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
                    entityManager.persist(record);
                } else if (rifRecordEvent.getRecordAction().equals(RecordAction.UPDATE)) {
                    loadAction = LoadAction.UPDATED;
                    // Skip this record if the year is not 2022 and its an update.
                    if (isBackdatedBene(rifRecordEvent)) {
                        /*
               * Serialize the record's CSV data back to actual RIF/CSV, as that's how we'll store
               * it in the DB.
               */
                        StringBuffer rifData = new StringBuffer();
                        try (CSVPrinter csvPrinter = new CSVPrinter(rifData, RifParsingUtils.CSV_FORMAT)) {
                            for (CSVRecord csvRow : rifRecordEvent.getRawCsvRecords()) {
                                csvPrinter.printRecord(csvRow);
                            }
                        }
                        // Save the skipped record to the DB.
                        SkippedRifRecord skippedRifRecord = new SkippedRifRecord(rifRecordEvent.getFileEvent().getParentFilesEvent().getTimestamp(), SkipReasonCode.DELAYED_BACKDATED_ENROLLMENT_BFD_1566, rifRecordEvent.getFileEvent().getFile().getFileType().name(), rifRecordEvent.getRecordAction(), ((Beneficiary) record).getBeneficiaryId(), rifData.toString());
                        entityManager.persist(skippedRifRecord);
                        LOGGER.info("Skipped RIF record, due to '{}'.", skippedRifRecord.getSkipReason());
                    } else {
                        tweakIfBeneficiary(entityManager, loadedBatchBuilder, rifRecordEvent);
                        entityManager.merge(record);
                    }
                } else {
                    throw new BadCodeMonkeyException(String.format("Unhandled %s: '%s'.", RecordAction.class, rifRecordEvent.getRecordAction()));
                }
            } else
                throw new BadCodeMonkeyException();
            LOGGER.trace("Loaded '{}' record.", rifFileType);
            fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "records", loadAction.name())).mark(1);
            loadResults.add(new RifRecordLoadResult(rifRecordEvent, loadAction));
        }
        LoadedBatch loadedBatch = loadedBatchBuilder.build();
        entityManager.persist(loadedBatch);
        txn.commit();
        // Update the metrics now that things have been pushed.
        timerBatchSuccess.stop();
        timerBatchTypeSuccess.stop();
        return loadResults;
    } catch (Throwable t) {
        timerBundleFailure.stop();
        fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).mark(1);
        LOGGER.warn("Failed to load '{}' record.", rifFileType, t);
        throw new RifLoadFailure(recordsBatch, t);
    } finally {
        /*
       * Some errors (e.g. HSQL constraint violations) seem to cause the
       * rollback to fail. Extra error handling is needed here, too, to
       * ensure that the failing data is captured.
       */
        try {
            if (txn != null && txn.isActive())
                txn.rollback();
        } catch (Throwable t) {
            timerBundleFailure.stop();
            fileEventMetrics.meter(MetricRegistry.name(getClass().getSimpleName(), "recordBatches", "failed")).mark(1);
            LOGGER.warn("Failed to load '{}' record.", rifFileType, t);
            throw new RifLoadFailure(recordsBatch, t);
        }
        if (entityManager != null)
            entityManager.close();
    }
}
Also used : RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) ArrayList(java.util.ArrayList) SkippedRifRecord(gov.cms.bfd.model.rif.SkippedRifRecord) RifFileType(gov.cms.bfd.model.rif.RifFileType) LoadedBatchBuilder(gov.cms.bfd.model.rif.LoadedBatchBuilder) CSVPrinter(org.apache.commons.csv.CSVPrinter) LoadAction(gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult.LoadAction) RifRecordBase(gov.cms.bfd.model.rif.RifRecordBase) EntityTransaction(javax.persistence.EntityTransaction) BadCodeMonkeyException(gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException) MetricRegistry(com.codahale.metrics.MetricRegistry) EntityManager(javax.persistence.EntityManager) Timer(com.codahale.metrics.Timer) RecordAction(gov.cms.bfd.model.rif.RecordAction) CSVRecord(org.apache.commons.csv.CSVRecord) Beneficiary(gov.cms.bfd.model.rif.Beneficiary) LoadedBatch(gov.cms.bfd.model.rif.LoadedBatch)

Aggregations

LoadedBatch (gov.cms.bfd.model.rif.LoadedBatch)7 LoadedFile (gov.cms.bfd.model.rif.LoadedFile)3 Instant (java.time.Instant)2 CriteriaBuilder (javax.persistence.criteria.CriteriaBuilder)2 Test (org.junit.jupiter.api.Test)2 MetricRegistry (com.codahale.metrics.MetricRegistry)1 Timer (com.codahale.metrics.Timer)1 Beneficiary (gov.cms.bfd.model.rif.Beneficiary)1 LoadedBatchBuilder (gov.cms.bfd.model.rif.LoadedBatchBuilder)1 RecordAction (gov.cms.bfd.model.rif.RecordAction)1 RifFileEvent (gov.cms.bfd.model.rif.RifFileEvent)1 RifFileType (gov.cms.bfd.model.rif.RifFileType)1 RifRecordBase (gov.cms.bfd.model.rif.RifRecordBase)1 SkippedRifRecord (gov.cms.bfd.model.rif.SkippedRifRecord)1 LoadAction (gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult.LoadAction)1 BadCodeMonkeyException (gov.cms.bfd.sharedutils.exceptions.BadCodeMonkeyException)1 ArrayList (java.util.ArrayList)1 EntityManager (javax.persistence.EntityManager)1 EntityTransaction (javax.persistence.EntityTransaction)1 CSVPrinter (org.apache.commons.csv.CSVPrinter)1