Search in sources :

Example 1 with RifFileEvent

use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.

the class RifLoaderIT method loadSample.

/**
 * Runs {@link RifLoader} against the specified {@link StaticRifResourceGroup}.
 *
 * @param sampleName a human-friendly name that will be logged to identify the data load being
 *     kicked off here
 * @param options the {@link LoadAppOptions} to use
 * @param rifFilesEvent the {@link RifFilesEvent} to load
 * @return the number of RIF records that were loaded (as reported by the {@link RifLoader})
 */
private int loadSample(String sampleName, LoadAppOptions options, RifFilesEvent rifFilesEvent) {
    LOGGER.info("Loading RIF files: '{}'...", sampleName);
    // Create the processors that will handle each stage of the pipeline.
    RifFilesProcessor processor = new RifFilesProcessor();
    RifLoader loader = new RifLoader(options, PipelineTestUtils.get().getPipelineApplicationState());
    // Link up the pipeline and run it.
    LOGGER.info("Loading RIF records...");
    AtomicInteger failureCount = new AtomicInteger(0);
    AtomicInteger loadCount = new AtomicInteger(0);
    for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
        RifFileRecords rifFileRecords = processor.produceRecords(rifFileEvent);
        loader.process(rifFileRecords, error -> {
            failureCount.incrementAndGet();
            LOGGER.warn("Record(s) failed to load.", error);
        }, result -> {
            loadCount.incrementAndGet();
        });
        Slf4jReporter.forRegistry(rifFileEvent.getEventMetrics()).outputTo(LOGGER).build().report();
    }
    LOGGER.info("Loaded RIF files: '{}', record count: '{}'.", sampleName, loadCount.get());
    Slf4jReporter.forRegistry(PipelineTestUtils.get().getPipelineApplicationState().getMetrics()).outputTo(LOGGER).build().report();
    // Verify that the expected number of records were run successfully.
    assertEquals(0, failureCount.get(), "Load errors encountered.");
    return loadCount.get();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor)

Example 2 with RifFileEvent

use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.

the class RifFilesProcessor method produceRecords.

/**
 * @param rifFileEvent the {@link RifFileEvent} that is being processed
 * @return a {@link RifFileRecords} with the {@link RifRecordEvent}s produced from the specified
 *     {@link RifFileEvent}
 */
public RifFileRecords produceRecords(RifFileEvent rifFileEvent) {
    RifFile file = rifFileEvent.getFile();
    /*
     * Approach used here to parse CSV as a Java 8 Stream is courtesy of
     * https://rumianom.pl/rumianom/entry/apache-commons-csv-with-java.
     */
    CSVParser parser = RifParsingUtils.createCsvParser(file);
    boolean isGrouped;
    BiFunction<RifFileEvent, List<CSVRecord>, RifRecordEvent<?>> recordParser;
    if (file.getFileType() == RifFileType.BENEFICIARY) {
        isGrouped = false;
        recordParser = RifFilesProcessor::buildBeneficiaryEvent;
    } else if (file.getFileType() == RifFileType.BENEFICIARY_HISTORY) {
        isGrouped = false;
        recordParser = RifFilesProcessor::buildBeneficiaryHistoryEvent;
    } else if (file.getFileType() == RifFileType.MEDICARE_BENEFICIARY_ID_HISTORY) {
        isGrouped = false;
        recordParser = RifFilesProcessor::buildMedicareBeneficiaryIdHistoryEvent;
    } else if (file.getFileType() == RifFileType.PDE) {
        isGrouped = false;
        recordParser = RifFilesProcessor::buildPartDEvent;
    } else if (file.getFileType() == RifFileType.CARRIER) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildCarrierClaimEvent;
    } else if (file.getFileType() == RifFileType.INPATIENT) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildInpatientClaimEvent;
    } else if (file.getFileType() == RifFileType.OUTPATIENT) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildOutpatientClaimEvent;
    } else if (file.getFileType() == RifFileType.SNF) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildSNFClaimEvent;
    } else if (file.getFileType() == RifFileType.HOSPICE) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildHospiceClaimEvent;
    } else if (file.getFileType() == RifFileType.HHA) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildHHAClaimEvent;
    } else if (file.getFileType() == RifFileType.DME) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildDMEClaimEvent;
    } else {
        throw new UnsupportedRifFileTypeException("Unsupported file type:" + file.getFileType());
    }
    /*
     * Use the CSVParser to drive a Stream of grouped CSVRecords
     * (specifically, group by claim ID/lines).
     */
    CsvRecordGrouper grouper = new ColumnValueCsvRecordGrouper(isGrouped ? file.getFileType().getIdColumn() : null);
    Iterator<List<CSVRecord>> csvIterator = new CsvRecordGroupingIterator(parser, grouper);
    Spliterator<List<CSVRecord>> spliterator = Spliterators.spliteratorUnknownSize(csvIterator, Spliterator.ORDERED | Spliterator.NONNULL);
    Stream<List<CSVRecord>> csvRecordStream = StreamSupport.stream(spliterator, false).onClose(() -> {
        try {
            /*
                     * This will also close the Reader and InputStream that the
                     * CSVParser was consuming.
                     */
            parser.close();
        } catch (IOException e) {
            LOGGER.warn("Unable to close CSVParser", e);
        }
    });
    /* Map each record group to a single RifRecordEvent. */
    Stream<RifRecordEvent<?>> rifRecordStream = csvRecordStream.map(csvRecordGroup -> {
        try {
            Timer.Context parsingTimer = rifFileEvent.getEventMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordParsing")).time();
            RifRecordEvent<?> recordEvent = recordParser.apply(rifFileEvent, csvRecordGroup);
            parsingTimer.close();
            return recordEvent;
        } catch (InvalidRifValueException e) {
            LOGGER.warn("Parse error encountered near line number '{}'.", csvRecordGroup.get(0).getRecordNumber());
            throw new InvalidRifValueException(e);
        }
    });
    return new RifFileRecords(rifFileEvent, rifRecordStream);
}
Also used : CsvRecordGrouper(gov.cms.bfd.pipeline.ccw.rif.extract.CsvRecordGroupingIterator.CsvRecordGrouper) ColumnValueCsvRecordGrouper(gov.cms.bfd.pipeline.ccw.rif.extract.CsvRecordGroupingIterator.ColumnValueCsvRecordGrouper) RifFile(gov.cms.bfd.model.rif.RifFile) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifRecordEvent(gov.cms.bfd.model.rif.RifRecordEvent) ColumnValueCsvRecordGrouper(gov.cms.bfd.pipeline.ccw.rif.extract.CsvRecordGroupingIterator.ColumnValueCsvRecordGrouper) IOException(java.io.IOException) Timer(com.codahale.metrics.Timer) InvalidRifValueException(gov.cms.bfd.model.rif.parse.InvalidRifValueException) CSVParser(org.apache.commons.csv.CSVParser) List(java.util.List) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) UnsupportedRifFileTypeException(gov.cms.bfd.pipeline.ccw.rif.extract.exceptions.UnsupportedRifFileTypeException)

Example 3 with RifFileEvent

use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.

the class DefaultDataSetMonitorListener method dataAvailable.

/**
 * @see
 *     gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener#dataAvailable(gov.cms.bfd.model.rif.RifFilesEvent)
 */
@Override
public void dataAvailable(RifFilesEvent rifFilesEvent) {
    Timer.Context timerDataSet = appMetrics.timer(MetricRegistry.name(PipelineApplication.class.getSimpleName(), "dataSet", "processed")).time();
    Consumer<Throwable> errorHandler = error -> {
        /*
           * This will be called on the same thread used to run each
           * RifLoader task (probably a background one). This is not
           * the right place to do any error _recovery_ (that'd have
           * to be inside RifLoader itself), but it is likely the
           * right place to decide when/if a failure is "bad enough"
           * that the rest of processing should be stopped. Right now
           * we stop that way for _any_ failure, but we probably want
           * to be more discriminating than that.
           */
        errorOccurred(error);
    };
    Consumer<RifRecordLoadResult> resultHandler = result -> {
    /*
           * Don't really *need* to do anything here. The RifLoader
           * already records metrics for each data set.
           */
    };
    /*
     * Each ETL stage produces a stream that will be handed off to
     * and processed by the next stage.
     */
    for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
        Slf4jReporter dataSetFileMetricsReporter = Slf4jReporter.forRegistry(rifFileEvent.getEventMetrics()).outputTo(LOGGER).build();
        dataSetFileMetricsReporter.start(2, TimeUnit.MINUTES);
        RifFileRecords rifFileRecords = rifProcessor.produceRecords(rifFileEvent);
        rifLoader.process(rifFileRecords, errorHandler, resultHandler);
        dataSetFileMetricsReporter.stop();
        dataSetFileMetricsReporter.report();
    }
    timerDataSet.stop();
}
Also used : RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) LoggerFactory(org.slf4j.LoggerFactory) RifRecordLoadResult(gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult) CcwRifLoadJob(gov.cms.bfd.pipeline.ccw.rif.CcwRifLoadJob) DataSetMonitorListener(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) Slf4jReporter(com.codahale.metrics.Slf4jReporter) Timer(com.codahale.metrics.Timer) RifLoader(gov.cms.bfd.pipeline.ccw.rif.load.RifLoader) Timer(com.codahale.metrics.Timer) RifRecordLoadResult(gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult) Slf4jReporter(com.codahale.metrics.Slf4jReporter) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords)

Example 4 with RifFileEvent

use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.

the class LoadedFilterManagerIT method loadData.

/**
 * @param sampleResources the sample RIF resources to load
 */
private static void loadData(DataSource dataSource, List<StaticRifResource> sampleResources) {
    LoadAppOptions loadOptions = CcwRifLoadTestUtils.getLoadOptions();
    RifFilesEvent rifFilesEvent = new RifFilesEvent(Instant.now(), sampleResources.stream().map(StaticRifResource::toRifFile).collect(Collectors.toList()));
    // Create the processors that will handle each stage of the pipeline.
    RifFilesProcessor processor = new RifFilesProcessor();
    RifLoader loader = new RifLoader(loadOptions, PipelineTestUtils.get().getPipelineApplicationState());
    // Link up the pipeline and run it.
    for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
        RifFileRecords rifFileRecords = processor.produceRecords(rifFileEvent);
        loader.process(rifFileRecords, error -> {
        }, result -> {
        });
    }
}
Also used : LoadAppOptions(gov.cms.bfd.pipeline.ccw.rif.load.LoadAppOptions) StaticRifResource(gov.cms.bfd.model.rif.samples.StaticRifResource) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) RifLoader(gov.cms.bfd.pipeline.ccw.rif.load.RifLoader)

Example 5 with RifFileEvent

use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.

the class ServerTestUtils method loadData.

/**
 * @param sampleResources the sample RIF resources to load
 * @return the {@link List} of RIF records that were loaded (e.g. {@link Beneficiary}s, etc.)
 */
public List<Object> loadData(List<StaticRifResource> sampleResources) {
    LoadAppOptions loadOptions = CcwRifLoadTestUtils.getLoadOptions();
    RifFilesEvent rifFilesEvent = new RifFilesEvent(Instant.now(), sampleResources.stream().map(r -> r.toRifFile()).collect(Collectors.toList()));
    // Create the processors that will handle each stage of the pipeline.
    RifFilesProcessor processor = new RifFilesProcessor();
    // Link up the pipeline and run it.
    RifLoader loader = new RifLoader(loadOptions, PipelineTestUtils.get().getPipelineApplicationState());
    LOGGER.info("Loading RIF records...");
    List<Object> recordsLoaded = new ArrayList<>();
    for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
        RifFileRecords rifFileRecords = processor.produceRecords(rifFileEvent);
        loader.process(rifFileRecords, error -> {
            LOGGER.warn("Record(s) failed to load.", error);
        }, result -> {
            recordsLoaded.add(result.getRifRecordEvent().getRecord());
        });
    }
    LOGGER.info("Loaded RIF records: '{}'.", recordsLoaded.size());
    return recordsLoaded;
}
Also used : LoadAppOptions(gov.cms.bfd.pipeline.ccw.rif.load.LoadAppOptions) ArrayList(java.util.ArrayList) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) RifLoader(gov.cms.bfd.pipeline.ccw.rif.load.RifLoader)

Aggregations

RifFileEvent (gov.cms.bfd.model.rif.RifFileEvent)8 RifFileRecords (gov.cms.bfd.model.rif.RifFileRecords)7 RifFilesProcessor (gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor)6 RifFilesEvent (gov.cms.bfd.model.rif.RifFilesEvent)5 RifLoader (gov.cms.bfd.pipeline.ccw.rif.load.RifLoader)4 Timer (com.codahale.metrics.Timer)3 LoadAppOptions (gov.cms.bfd.pipeline.ccw.rif.load.LoadAppOptions)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 MetricRegistry (com.codahale.metrics.MetricRegistry)2 Beneficiary (gov.cms.bfd.model.rif.Beneficiary)2 StaticRifResource (gov.cms.bfd.model.rif.samples.StaticRifResource)2 UncheckedIOException (java.io.UncheckedIOException)2 Path (java.nio.file.Path)2 EntityManager (javax.persistence.EntityManager)2 EntityTransaction (javax.persistence.EntityTransaction)2 CSVPrinter (org.apache.commons.csv.CSVPrinter)2 FhirContext (ca.uhn.fhir.context.FhirContext)1 IGenericClient (ca.uhn.fhir.rest.client.api.IGenericClient)1