Search in sources :

Example 1 with RifFilesEvent

use of gov.cms.bfd.model.rif.RifFilesEvent in project beneficiary-fhir-data by CMSgov.

the class RifFilesProcessorTest method processBeneficiaryHistoryRecord_SAMPLE_A.

/**
 * Ensures that {@link RifFilesProcessor} can correctly handle {@link
 * StaticRifResource#SAMPLE_A_BENEFICIARY_HISTORY}.
 */
@Test
public void processBeneficiaryHistoryRecord_SAMPLE_A() {
    RifFilesEvent filesEvent = new RifFilesEvent(Instant.now(), StaticRifResource.SAMPLE_A_BENEFICIARY_HISTORY.toRifFile());
    RifFilesProcessor processor = new RifFilesProcessor();
    RifFileRecords rifFileRecords = processor.produceRecords(filesEvent.getFileEvents().get(0));
    List<RifRecordEvent<?>> rifEventsList = rifFileRecords.getRecords().collect(Collectors.toList());
    assertEquals(StaticRifResource.SAMPLE_A_BENEFICIARY_HISTORY.getRecordCount(), rifEventsList.size());
    RifRecordEvent<?> rifRecordEvent0 = rifEventsList.get(0);
    assertEquals(StaticRifResource.SAMPLE_A_BENEFICIARY_HISTORY.getRifFileType(), rifRecordEvent0.getFileEvent().getFile().getFileType());
    assertNotNull(rifRecordEvent0.getRecord());
    assertTrue(rifRecordEvent0.getRecord() instanceof BeneficiaryHistory);
    BeneficiaryHistory beneficiaryHistory0 = (BeneficiaryHistory) rifRecordEvent0.getRecord();
    assertEquals(beneficiaryHistory0.getBeneficiaryId(), rifRecordEvent0.getBeneficiaryId());
    assertEquals(RecordAction.INSERT, rifRecordEvent0.getRecordAction());
    assertEquals("567834", beneficiaryHistory0.getBeneficiaryId());
    assertEquals(LocalDate.of(1979, Month.MARCH, 17), beneficiaryHistory0.getBirthDate());
    assertEquals(('2'), beneficiaryHistory0.getSex());
    assertEquals("543217066Z", beneficiaryHistory0.getHicn());
    assertEquals(Optional.of("3456689"), beneficiaryHistory0.getMedicareBeneficiaryId());
    assertEquals(LocalDate.of(1990, Month.MARCH, 17), beneficiaryHistory0.getMbiEffectiveDate().get());
    assertEquals(LocalDate.of(1995, Month.MARCH, 17), beneficiaryHistory0.getMbiObsoleteDate().get());
    /*
     * We should expect and be able to cope with BENEFICIARY_HISTORY records that
     * are exact duplicates.
     */
    for (RifRecordEvent<?> rifRecordEvent : new RifRecordEvent<?>[] { rifEventsList.get(1), rifEventsList.get(2) }) {
        assertEquals(StaticRifResource.SAMPLE_A_BENEFICIARY_HISTORY.getRifFileType(), rifRecordEvent.getFileEvent().getFile().getFileType());
        assertNotNull(rifRecordEvent.getRecord());
        assertTrue(rifRecordEvent.getRecord() instanceof BeneficiaryHistory);
        BeneficiaryHistory beneficiaryHistory = (BeneficiaryHistory) rifRecordEvent.getRecord();
        assertEquals(RecordAction.INSERT, rifRecordEvent.getRecordAction());
        assertEquals("567834", beneficiaryHistory.getBeneficiaryId());
        assertEquals(LocalDate.of(1980, Month.MARCH, 17), beneficiaryHistory.getBirthDate());
        assertEquals(('1'), beneficiaryHistory.getSex());
        assertEquals("543217066T", beneficiaryHistory.getHicn());
        assertEquals(Optional.of("3456789"), beneficiaryHistory.getMedicareBeneficiaryId());
        assertEquals(LocalDate.of(1990, Month.MARCH, 17), beneficiaryHistory0.getMbiEffectiveDate().get());
        assertEquals(LocalDate.of(1995, Month.MARCH, 17), beneficiaryHistory0.getMbiObsoleteDate().get());
    }
}
Also used : BeneficiaryHistory(gov.cms.bfd.model.rif.BeneficiaryHistory) RifRecordEvent(gov.cms.bfd.model.rif.RifRecordEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) Test(org.junit.jupiter.api.Test)

Example 2 with RifFilesEvent

use of gov.cms.bfd.model.rif.RifFilesEvent in project beneficiary-fhir-data by CMSgov.

the class RifFilesProcessorTest method process1MedicareBeneficiaryIdHistoryRecord.

/**
 * Ensures that {@link RifFilesProcessor} can correctly handle {@link
 * StaticRifResource#SAMPLE_A_MEDICARE_BENEFICIARY_ID_HISTORY}.
 */
@Test
public void process1MedicareBeneficiaryIdHistoryRecord() {
    RifFilesEvent filesEvent = new RifFilesEvent(Instant.now(), StaticRifResource.SAMPLE_A_MEDICARE_BENEFICIARY_ID_HISTORY.toRifFile());
    RifFilesProcessor processor = new RifFilesProcessor();
    RifFileRecords rifFileRecords = processor.produceRecords(filesEvent.getFileEvents().get(0));
    List<RifRecordEvent<?>> rifEventsList = rifFileRecords.getRecords().collect(Collectors.toList());
    assertEquals(StaticRifResource.SAMPLE_A_MEDICARE_BENEFICIARY_ID_HISTORY.getRecordCount(), rifEventsList.size());
    RifRecordEvent<?> rifRecordEvent0 = rifEventsList.get(0);
    assertEquals(StaticRifResource.SAMPLE_A_MEDICARE_BENEFICIARY_ID_HISTORY.getRifFileType(), rifRecordEvent0.getFileEvent().getFile().getFileType());
    assertNotNull(rifRecordEvent0.getRecord());
    assertTrue(rifRecordEvent0.getRecord() instanceof MedicareBeneficiaryIdHistory);
    MedicareBeneficiaryIdHistory medicareBeneficiaryIdHistory = (MedicareBeneficiaryIdHistory) rifRecordEvent0.getRecord();
    assertEquals("567834", medicareBeneficiaryIdHistory.getBeneficiaryId().get());
    assertEquals(LocalDate.of(2011, Month.APRIL, 16), medicareBeneficiaryIdHistory.getMbiEffectiveDate().get());
    assertEquals("9AB2WW3GR44", medicareBeneficiaryIdHistory.getMedicareBeneficiaryId().get());
}
Also used : RifRecordEvent(gov.cms.bfd.model.rif.RifRecordEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) MedicareBeneficiaryIdHistory(gov.cms.bfd.model.rif.MedicareBeneficiaryIdHistory) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) Test(org.junit.jupiter.api.Test)

Example 3 with RifFilesEvent

use of gov.cms.bfd.model.rif.RifFilesEvent in project beneficiary-fhir-data by CMSgov.

the class RifFilesProcessorTest method process1BeneRecord.

/**
 * Ensures that {@link RifFilesProcessor} can correctly handle {@link
 * StaticRifResource#SAMPLE_A_BENES}.
 */
@Test
public void process1BeneRecord() {
    RifFilesEvent filesEvent = new RifFilesEvent(Instant.now(), StaticRifResource.SAMPLE_A_BENES.toRifFile());
    RifFilesProcessor processor = new RifFilesProcessor();
    RifFileRecords rifFileRecords = processor.produceRecords(filesEvent.getFileEvents().get(0));
    List<RifRecordEvent<?>> rifEventsList = rifFileRecords.getRecords().collect(Collectors.toList());
    assertEquals(StaticRifResource.SAMPLE_A_BENES.getRecordCount(), rifEventsList.size());
    RifRecordEvent<?> rifRecordEvent = rifEventsList.get(0);
    assertEquals(StaticRifResource.SAMPLE_A_BENES.getRifFileType(), rifRecordEvent.getFileEvent().getFile().getFileType());
    assertNotNull(rifRecordEvent.getRecord());
    assertTrue(rifRecordEvent.getRecord() instanceof Beneficiary);
    Beneficiary beneRow = (Beneficiary) rifRecordEvent.getRecord();
    assertEquals(beneRow.getBeneficiaryId(), rifRecordEvent.getBeneficiaryId());
    assertEquals(RecordAction.INSERT, rifRecordEvent.getRecordAction());
    assertEquals("567834", beneRow.getBeneficiaryId());
    assertEquals("MO", beneRow.getStateCode());
    assertEquals("123", beneRow.getCountyCode());
    assertEquals("12345", beneRow.getPostalCode());
    assertEquals(LocalDate.of(1981, Month.MARCH, 17), beneRow.getBirthDate());
    assertEquals(('1'), beneRow.getSex());
    assertEquals(new Character('1'), beneRow.getRace().get());
    assertEquals(new Character('1'), beneRow.getEntitlementCodeOriginal().get());
    assertEquals(new Character('1'), beneRow.getEntitlementCodeCurrent().get());
    assertEquals(new Character('N'), beneRow.getEndStageRenalDiseaseCode().get());
    assertEquals(new String("20"), beneRow.getMedicareEnrollmentStatusCode().get());
    assertEquals(new Character('0'), beneRow.getPartBTerminationCode().get());
    assertEquals(new Character('0'), beneRow.getPartBTerminationCode().get());
    assertEquals("543217066U", beneRow.getHicnUnhashed().orElse(null));
    assertEquals("Doe", beneRow.getNameSurname());
    assertEquals("John", beneRow.getNameGiven());
    assertEquals(new Character('A'), beneRow.getNameMiddleInitial().get());
    assertEquals("3456789", beneRow.getMedicareBeneficiaryId().get());
    assertEquals(LocalDate.of(1981, Month.MARCH, 17), beneRow.getBeneficiaryDateOfDeath().get());
    assertEquals(LocalDate.of(1963, Month.OCTOBER, 3), beneRow.getMedicareCoverageStartDate().get());
    assertEquals(new Character('1'), beneRow.getHmoIndicatorAprInd().get());
    assertEquals(new BigDecimal(5), beneRow.getPartDMonthsCount().get());
    assertEquals("00", beneRow.getPartDLowIncomeCostShareGroupFebCode().get());
    assertEquals(new Character('N'), beneRow.getPartDRetireeDrugSubsidyDecInd().get());
    assertEquals("204 SOUTH ST", beneRow.getDerivedMailingAddress1().get());
    assertEquals("7560 123TH ST", beneRow.getDerivedMailingAddress2().get());
    assertEquals("SURREY", beneRow.getDerivedMailingAddress3().get());
    assertEquals("DAEJEON SI 34867", beneRow.getDerivedMailingAddress4().get());
    assertEquals("COLOMBIA", beneRow.getDerivedMailingAddress5().get());
    assertEquals("SURREY", beneRow.getDerivedMailingAddress6().get());
    assertEquals("PODUNK", beneRow.getDerivedCityName().get());
    assertEquals("IA", beneRow.getDerivedStateCode().get());
    assertEquals("123456789", beneRow.getDerivedZipCode().get());
    assertEquals(LocalDate.of(2020, Month.JULY, 30), beneRow.getMbiEffectiveDate().get());
    assertEquals(new BigDecimal("1"), beneRow.getBeneLinkKey().get());
}
Also used : RifRecordEvent(gov.cms.bfd.model.rif.RifRecordEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) BigDecimal(java.math.BigDecimal) Beneficiary(gov.cms.bfd.model.rif.Beneficiary) Test(org.junit.jupiter.api.Test)

Example 4 with RifFilesEvent

use of gov.cms.bfd.model.rif.RifFilesEvent in project beneficiary-fhir-data by CMSgov.

the class CcwRifLoadJob method call.

/**
 * @see gov.cms.bfd.pipeline.sharedutils.PipelineJob#call()
 */
@Override
public PipelineJobOutcome call() throws Exception {
    LOGGER.debug("Scanning for data sets to process...");
    // Update the queue from S3.
    dataSetQueue.updatePendingDataSets();
    // If no manifest was found, we're done (until next time).
    if (dataSetQueue.isEmpty()) {
        LOGGER.debug(LOG_MESSAGE_NO_DATA_SETS);
        listener.noDataAvailable();
        return PipelineJobOutcome.NOTHING_TO_DO;
    }
    // We've found the oldest manifest.
    DataSetManifest manifestToProcess = dataSetQueue.getNextDataSetToProcess().get();
    LOGGER.info("Found data set to process: '{}'." + " There were '{}' total pending data sets and '{}' completed ones.", manifestToProcess.toString(), dataSetQueue.getPendingManifestsCount(), dataSetQueue.getCompletedManifestsCount().get());
    /*
     * We've got a data set to process. However, it might still be uploading
     * to S3, so we need to wait for that to complete before we start
     * processing it.
     */
    boolean alreadyLoggedWaitingEvent = false;
    while (!dataSetIsAvailable(manifestToProcess)) {
        /*
       * We're very patient here, so we keep looping, but it's prudent to
       * pause between each iteration. TODO should eventually time out,
       * once we know how long transfers might take
       */
        try {
            if (!alreadyLoggedWaitingEvent) {
                LOGGER.info("Data set not ready. Waiting for it to finish uploading...");
                alreadyLoggedWaitingEvent = true;
            }
            Thread.sleep(1000 * 1);
        } catch (InterruptedException e) {
            /*
         * Many Java applications use InterruptedExceptions to signal
         * that a thread should stop what it's doing ASAP. This app
         * doesn't, so this is unexpected, and accordingly, we don't
         * know what to do. Safest bet is to blow up.
         */
            throw new RuntimeException(e);
        }
    }
    /*
     * Huzzah! We've got a data set to process and we've verified it's all there
     * waiting for us in S3. Now convert it into a RifFilesEvent (containing a List
     * of asynchronously-downloading S3RifFiles.
     */
    LOGGER.info(LOG_MESSAGE_DATA_SET_READY);
    List<S3RifFile> rifFiles = manifestToProcess.getEntries().stream().map(manifestEntry -> new S3RifFile(appMetrics, manifestEntry, s3TaskManager.downloadAsync(manifestEntry))).collect(Collectors.toList());
    RifFilesEvent rifFilesEvent = new RifFilesEvent(manifestToProcess.getTimestamp(), new ArrayList<>(rifFiles));
    /*
     * To save time for the next data set, peek ahead at it. If it's available and
     * it looks like there's enough disk space, start downloading it early in the
     * background.
     */
    Optional<DataSetManifest> secondManifestToProcess = dataSetQueue.getSecondDataSetToProcess();
    if (secondManifestToProcess.isPresent() && dataSetIsAvailable(secondManifestToProcess.get())) {
        Path tmpdir = Paths.get(System.getProperty("java.io.tmpdir"));
        long usableFreeTempSpace;
        try {
            usableFreeTempSpace = Files.getFileStore(tmpdir).getUsableSpace();
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
        if (usableFreeTempSpace >= (50 * GIGA)) {
            secondManifestToProcess.get().getEntries().stream().forEach(manifestEntry -> s3TaskManager.downloadAsync(manifestEntry));
        }
    }
    /*
     * Now we hand that off to the DataSetMonitorListener, to do the *real*
     * work of actually processing that data set. It's important that we
     * block until it's completed, in order to ensure that we don't end up
     * processing multiple data sets in parallel (which would lead to data
     * consistency problems).
     */
    listener.dataAvailable(rifFilesEvent);
    LOGGER.info(LOG_MESSAGE_DATA_SET_COMPLETE);
    /*
     * Now that the data set has been processed, we need to ensure that we
     * don't end up processing it again. We ensure this two ways: 1) we keep
     * a list of the data sets most recently processed, and 2) we rename the
     * S3 objects that comprise that data set. (#1 is required as S3
     * deletes/moves are only *eventually* consistent, so #2 may not take
     * effect right away.)
     */
    rifFiles.stream().forEach(f -> f.cleanupTempFile());
    dataSetQueue.markProcessed(manifestToProcess);
    s3TaskManager.submit(new DataSetMoveTask(s3TaskManager, options, manifestToProcess));
    return PipelineJobOutcome.WORK_DONE;
}
Also used : S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) NullPipelineJobArguments(gov.cms.bfd.pipeline.sharedutils.NullPipelineJobArguments) LoggerFactory(org.slf4j.LoggerFactory) PipelineJob(gov.cms.bfd.pipeline.sharedutils.PipelineJob) ExtractionOptions(gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions) DataSetMonitorListener(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) ArrayList(java.util.ArrayList) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) S3TaskManager(gov.cms.bfd.pipeline.ccw.rif.extract.s3.task.S3TaskManager) HashSet(java.util.HashSet) ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) PipelineJobSchedule(gov.cms.bfd.pipeline.sharedutils.PipelineJobSchedule) DataSetMoveTask(gov.cms.bfd.pipeline.ccw.rif.extract.s3.task.DataSetMoveTask) Path(java.nio.file.Path) PipelineJobOutcome(gov.cms.bfd.pipeline.sharedutils.PipelineJobOutcome) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) Files(java.nio.file.Files) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) ChronoUnit(java.time.temporal.ChronoUnit) Paths(java.nio.file.Paths) DataSetManifestId(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestId) Optional(java.util.Optional) DataSetQueue(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetQueue) Pattern(java.util.regex.Pattern) Path(java.nio.file.Path) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) DataSetMoveTask(gov.cms.bfd.pipeline.ccw.rif.extract.s3.task.DataSetMoveTask) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent)

Example 5 with RifFilesEvent

use of gov.cms.bfd.model.rif.RifFilesEvent in project beneficiary-fhir-data by CMSgov.

the class DefaultDataSetMonitorListener method dataAvailable.

/**
 * @see
 *     gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener#dataAvailable(gov.cms.bfd.model.rif.RifFilesEvent)
 */
@Override
public void dataAvailable(RifFilesEvent rifFilesEvent) {
    Timer.Context timerDataSet = appMetrics.timer(MetricRegistry.name(PipelineApplication.class.getSimpleName(), "dataSet", "processed")).time();
    Consumer<Throwable> errorHandler = error -> {
        /*
           * This will be called on the same thread used to run each
           * RifLoader task (probably a background one). This is not
           * the right place to do any error _recovery_ (that'd have
           * to be inside RifLoader itself), but it is likely the
           * right place to decide when/if a failure is "bad enough"
           * that the rest of processing should be stopped. Right now
           * we stop that way for _any_ failure, but we probably want
           * to be more discriminating than that.
           */
        errorOccurred(error);
    };
    Consumer<RifRecordLoadResult> resultHandler = result -> {
    /*
           * Don't really *need* to do anything here. The RifLoader
           * already records metrics for each data set.
           */
    };
    /*
     * Each ETL stage produces a stream that will be handed off to
     * and processed by the next stage.
     */
    for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
        Slf4jReporter dataSetFileMetricsReporter = Slf4jReporter.forRegistry(rifFileEvent.getEventMetrics()).outputTo(LOGGER).build();
        dataSetFileMetricsReporter.start(2, TimeUnit.MINUTES);
        RifFileRecords rifFileRecords = rifProcessor.produceRecords(rifFileEvent);
        rifLoader.process(rifFileRecords, errorHandler, resultHandler);
        dataSetFileMetricsReporter.stop();
        dataSetFileMetricsReporter.report();
    }
    timerDataSet.stop();
}
Also used : RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) LoggerFactory(org.slf4j.LoggerFactory) RifRecordLoadResult(gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult) CcwRifLoadJob(gov.cms.bfd.pipeline.ccw.rif.CcwRifLoadJob) DataSetMonitorListener(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) Slf4jReporter(com.codahale.metrics.Slf4jReporter) Timer(com.codahale.metrics.Timer) RifLoader(gov.cms.bfd.pipeline.ccw.rif.load.RifLoader) Timer(com.codahale.metrics.Timer) RifRecordLoadResult(gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult) Slf4jReporter(com.codahale.metrics.Slf4jReporter) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords)

Aggregations

RifFilesEvent (gov.cms.bfd.model.rif.RifFilesEvent)21 RifFileRecords (gov.cms.bfd.model.rif.RifFileRecords)18 RifRecordEvent (gov.cms.bfd.model.rif.RifRecordEvent)13 Test (org.junit.jupiter.api.Test)13 BigDecimal (java.math.BigDecimal)9 RifFileEvent (gov.cms.bfd.model.rif.RifFileEvent)6 RifFilesProcessor (gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor)6 Beneficiary (gov.cms.bfd.model.rif.Beneficiary)4 RifLoader (gov.cms.bfd.pipeline.ccw.rif.load.RifLoader)4 IOException (java.io.IOException)4 UncheckedIOException (java.io.UncheckedIOException)4 Path (java.nio.file.Path)4 Logger (org.slf4j.Logger)4 LoggerFactory (org.slf4j.LoggerFactory)4 StaticRifResource (gov.cms.bfd.model.rif.samples.StaticRifResource)3 LoadAppOptions (gov.cms.bfd.pipeline.ccw.rif.load.LoadAppOptions)3 Files (java.nio.file.Files)3 List (java.util.List)3 MetricRegistry (com.codahale.metrics.MetricRegistry)2 Slf4jReporter (com.codahale.metrics.Slf4jReporter)2