Search in sources :

Example 1 with RifLoader

use of gov.cms.bfd.pipeline.ccw.rif.load.RifLoader in project beneficiary-fhir-data by CMSgov.

the class DefaultDataSetMonitorListener method dataAvailable.

/**
 * @see
 *     gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener#dataAvailable(gov.cms.bfd.model.rif.RifFilesEvent)
 */
@Override
public void dataAvailable(RifFilesEvent rifFilesEvent) {
    Timer.Context timerDataSet = appMetrics.timer(MetricRegistry.name(PipelineApplication.class.getSimpleName(), "dataSet", "processed")).time();
    Consumer<Throwable> errorHandler = error -> {
        /*
           * This will be called on the same thread used to run each
           * RifLoader task (probably a background one). This is not
           * the right place to do any error _recovery_ (that'd have
           * to be inside RifLoader itself), but it is likely the
           * right place to decide when/if a failure is "bad enough"
           * that the rest of processing should be stopped. Right now
           * we stop that way for _any_ failure, but we probably want
           * to be more discriminating than that.
           */
        errorOccurred(error);
    };
    Consumer<RifRecordLoadResult> resultHandler = result -> {
    /*
           * Don't really *need* to do anything here. The RifLoader
           * already records metrics for each data set.
           */
    };
    /*
     * Each ETL stage produces a stream that will be handed off to
     * and processed by the next stage.
     */
    for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
        Slf4jReporter dataSetFileMetricsReporter = Slf4jReporter.forRegistry(rifFileEvent.getEventMetrics()).outputTo(LOGGER).build();
        dataSetFileMetricsReporter.start(2, TimeUnit.MINUTES);
        RifFileRecords rifFileRecords = rifProcessor.produceRecords(rifFileEvent);
        rifLoader.process(rifFileRecords, errorHandler, resultHandler);
        dataSetFileMetricsReporter.stop();
        dataSetFileMetricsReporter.report();
    }
    timerDataSet.stop();
}
Also used : RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) MetricRegistry(com.codahale.metrics.MetricRegistry) Logger(org.slf4j.Logger) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) LoggerFactory(org.slf4j.LoggerFactory) RifRecordLoadResult(gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult) CcwRifLoadJob(gov.cms.bfd.pipeline.ccw.rif.CcwRifLoadJob) DataSetMonitorListener(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) Slf4jReporter(com.codahale.metrics.Slf4jReporter) Timer(com.codahale.metrics.Timer) RifLoader(gov.cms.bfd.pipeline.ccw.rif.load.RifLoader) Timer(com.codahale.metrics.Timer) RifRecordLoadResult(gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult) Slf4jReporter(com.codahale.metrics.Slf4jReporter) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords)

Example 2 with RifLoader

use of gov.cms.bfd.pipeline.ccw.rif.load.RifLoader in project beneficiary-fhir-data by CMSgov.

the class PipelineApplication method createCcwRifLoadJob.

/**
 * @param loadOptions the {@link CcwRifLoadOptions} to use
 * @param appState the {@link PipelineApplicationState} to use
 * @return a {@link CcwRifLoadJob} instance for the application to use
 */
private static PipelineJob<?> createCcwRifLoadJob(CcwRifLoadOptions loadOptions, PipelineApplicationState appState) {
    /*
     * Create the services that will be used to handle each stage in the extract, transform, and
     * load process.
     */
    S3TaskManager s3TaskManager = new S3TaskManager(appState.getMetrics(), loadOptions.getExtractionOptions());
    RifFilesProcessor rifProcessor = new RifFilesProcessor();
    RifLoader rifLoader = new RifLoader(loadOptions.getLoadOptions(), appState);
    /*
     * Create the DataSetMonitorListener that will glue those stages together and run them all for
     * each data set that is found.
     */
    DataSetMonitorListener dataSetMonitorListener = new DefaultDataSetMonitorListener(appState.getMetrics(), PipelineApplication::handleUncaughtException, rifProcessor, rifLoader);
    CcwRifLoadJob ccwRifLoadJob = new CcwRifLoadJob(appState.getMetrics(), loadOptions.getExtractionOptions(), s3TaskManager, dataSetMonitorListener);
    return ccwRifLoadJob;
}
Also used : S3TaskManager(gov.cms.bfd.pipeline.ccw.rif.extract.s3.task.S3TaskManager) DataSetMonitorListener(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener) CcwRifLoadJob(gov.cms.bfd.pipeline.ccw.rif.CcwRifLoadJob) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) RifLoader(gov.cms.bfd.pipeline.ccw.rif.load.RifLoader)

Example 3 with RifLoader

use of gov.cms.bfd.pipeline.ccw.rif.load.RifLoader in project beneficiary-fhir-data by CMSgov.

the class LoadedFilterManagerIT method loadData.

/**
 * @param sampleResources the sample RIF resources to load
 */
private static void loadData(DataSource dataSource, List<StaticRifResource> sampleResources) {
    LoadAppOptions loadOptions = CcwRifLoadTestUtils.getLoadOptions();
    RifFilesEvent rifFilesEvent = new RifFilesEvent(Instant.now(), sampleResources.stream().map(StaticRifResource::toRifFile).collect(Collectors.toList()));
    // Create the processors that will handle each stage of the pipeline.
    RifFilesProcessor processor = new RifFilesProcessor();
    RifLoader loader = new RifLoader(loadOptions, PipelineTestUtils.get().getPipelineApplicationState());
    // Link up the pipeline and run it.
    for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
        RifFileRecords rifFileRecords = processor.produceRecords(rifFileEvent);
        loader.process(rifFileRecords, error -> {
        }, result -> {
        });
    }
}
Also used : LoadAppOptions(gov.cms.bfd.pipeline.ccw.rif.load.LoadAppOptions) StaticRifResource(gov.cms.bfd.model.rif.samples.StaticRifResource) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) RifLoader(gov.cms.bfd.pipeline.ccw.rif.load.RifLoader)

Example 4 with RifLoader

use of gov.cms.bfd.pipeline.ccw.rif.load.RifLoader in project beneficiary-fhir-data by CMSgov.

the class ServerTestUtils method loadData.

/**
 * @param sampleResources the sample RIF resources to load
 * @return the {@link List} of RIF records that were loaded (e.g. {@link Beneficiary}s, etc.)
 */
public List<Object> loadData(List<StaticRifResource> sampleResources) {
    LoadAppOptions loadOptions = CcwRifLoadTestUtils.getLoadOptions();
    RifFilesEvent rifFilesEvent = new RifFilesEvent(Instant.now(), sampleResources.stream().map(r -> r.toRifFile()).collect(Collectors.toList()));
    // Create the processors that will handle each stage of the pipeline.
    RifFilesProcessor processor = new RifFilesProcessor();
    // Link up the pipeline and run it.
    RifLoader loader = new RifLoader(loadOptions, PipelineTestUtils.get().getPipelineApplicationState());
    LOGGER.info("Loading RIF records...");
    List<Object> recordsLoaded = new ArrayList<>();
    for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
        RifFileRecords rifFileRecords = processor.produceRecords(rifFileEvent);
        loader.process(rifFileRecords, error -> {
            LOGGER.warn("Record(s) failed to load.", error);
        }, result -> {
            recordsLoaded.add(result.getRifRecordEvent().getRecord());
        });
    }
    LOGGER.info("Loaded RIF records: '{}'.", recordsLoaded.size());
    return recordsLoaded;
}
Also used : LoadAppOptions(gov.cms.bfd.pipeline.ccw.rif.load.LoadAppOptions) ArrayList(java.util.ArrayList) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) RifLoader(gov.cms.bfd.pipeline.ccw.rif.load.RifLoader)

Aggregations

RifFilesProcessor (gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor)4 RifLoader (gov.cms.bfd.pipeline.ccw.rif.load.RifLoader)4 RifFileEvent (gov.cms.bfd.model.rif.RifFileEvent)3 RifFileRecords (gov.cms.bfd.model.rif.RifFileRecords)3 RifFilesEvent (gov.cms.bfd.model.rif.RifFilesEvent)3 CcwRifLoadJob (gov.cms.bfd.pipeline.ccw.rif.CcwRifLoadJob)2 DataSetMonitorListener (gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener)2 LoadAppOptions (gov.cms.bfd.pipeline.ccw.rif.load.LoadAppOptions)2 MetricRegistry (com.codahale.metrics.MetricRegistry)1 Slf4jReporter (com.codahale.metrics.Slf4jReporter)1 Timer (com.codahale.metrics.Timer)1 StaticRifResource (gov.cms.bfd.model.rif.samples.StaticRifResource)1 S3TaskManager (gov.cms.bfd.pipeline.ccw.rif.extract.s3.task.S3TaskManager)1 RifRecordLoadResult (gov.cms.bfd.pipeline.ccw.rif.load.RifRecordLoadResult)1 ArrayList (java.util.ArrayList)1 TimeUnit (java.util.concurrent.TimeUnit)1 Consumer (java.util.function.Consumer)1 Logger (org.slf4j.Logger)1 LoggerFactory (org.slf4j.LoggerFactory)1