use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.
the class RifLoaderIT method loadSample.
/**
* Runs {@link RifLoader} against the specified {@link StaticRifResourceGroup}.
*
* @param sampleName a human-friendly name that will be logged to identify the data load being
* kicked off here
* @param options the {@link LoadAppOptions} to use
* @param rifFilesEvent the {@link RifFilesEvent} to load
* @return the number of RIF records that were loaded (as reported by the {@link RifLoader})
*/
private int loadSample(String sampleName, LoadAppOptions options, RifFilesEvent rifFilesEvent) {
LOGGER.info("Loading RIF files: '{}'...", sampleName);
// Create the processors that will handle each stage of the pipeline.
RifFilesProcessor processor = new RifFilesProcessor();
RifLoader loader = new RifLoader(options, PipelineTestUtils.get().getPipelineApplicationState());
// Link up the pipeline and run it.
LOGGER.info("Loading RIF records...");
AtomicInteger failureCount = new AtomicInteger(0);
AtomicInteger loadCount = new AtomicInteger(0);
for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
RifFileRecords rifFileRecords = processor.produceRecords(rifFileEvent);
loader.process(rifFileRecords, error -> {
failureCount.incrementAndGet();
LOGGER.warn("Record(s) failed to load.", error);
}, result -> {
loadCount.incrementAndGet();
});
Slf4jReporter.forRegistry(rifFileEvent.getEventMetrics()).outputTo(LOGGER).build().report();
}
LOGGER.info("Loaded RIF files: '{}', record count: '{}'.", sampleName, loadCount.get());
Slf4jReporter.forRegistry(PipelineTestUtils.get().getPipelineApplicationState().getMetrics()).outputTo(LOGGER).build().report();
// Verify that the expected number of records were run successfully.
assertEquals(0, failureCount.get(), "Load errors encountered.");
return loadCount.get();
}
use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.
the class RifFilesProcessor method produceRecords.
/**
* @param rifFileEvent the {@link RifFileEvent} that is being processed
* @return a {@link RifFileRecords} with the {@link RifRecordEvent}s produced from the specified
* {@link RifFileEvent}
*/
public RifFileRecords produceRecords(RifFileEvent rifFileEvent) {
RifFile file = rifFileEvent.getFile();
/*
* Approach used here to parse CSV as a Java 8 Stream is courtesy of
* https://rumianom.pl/rumianom/entry/apache-commons-csv-with-java.
*/
CSVParser parser = RifParsingUtils.createCsvParser(file);
boolean isGrouped;
BiFunction<RifFileEvent, List<CSVRecord>, RifRecordEvent<?>> recordParser;
if (file.getFileType() == RifFileType.BENEFICIARY) {
isGrouped = false;
recordParser = RifFilesProcessor::buildBeneficiaryEvent;
} else if (file.getFileType() == RifFileType.BENEFICIARY_HISTORY) {
isGrouped = false;
recordParser = RifFilesProcessor::buildBeneficiaryHistoryEvent;
} else if (file.getFileType() == RifFileType.MEDICARE_BENEFICIARY_ID_HISTORY) {
isGrouped = false;
recordParser = RifFilesProcessor::buildMedicareBeneficiaryIdHistoryEvent;
} else if (file.getFileType() == RifFileType.PDE) {
isGrouped = false;
recordParser = RifFilesProcessor::buildPartDEvent;
} else if (file.getFileType() == RifFileType.CARRIER) {
isGrouped = true;
recordParser = RifFilesProcessor::buildCarrierClaimEvent;
} else if (file.getFileType() == RifFileType.INPATIENT) {
isGrouped = true;
recordParser = RifFilesProcessor::buildInpatientClaimEvent;
} else if (file.getFileType() == RifFileType.OUTPATIENT) {
isGrouped = true;
recordParser = RifFilesProcessor::buildOutpatientClaimEvent;
} else if (file.getFileType() == RifFileType.SNF) {
isGrouped = true;
recordParser = RifFilesProcessor::buildSNFClaimEvent;
} else if (file.getFileType() == RifFileType.HOSPICE) {
isGrouped = true;
recordParser = RifFilesProcessor::buildHospiceClaimEvent;
} else if (file.getFileType() == RifFileType.HHA) {
isGrouped = true;
recordParser = RifFilesProcessor::buildHHAClaimEvent;
} else if (file.getFileType() == RifFileType.DME) {
isGrouped = true;
recordParser = RifFilesProcessor::buildDMEClaimEvent;
} else {
throw new UnsupportedRifFileTypeException("Unsupported file type:" + file.getFileType());
}
/*
* Use the CSVParser to drive a Stream of grouped CSVRecords
* (specifically, group by claim ID/lines).
*/
CsvRecordGrouper grouper = new ColumnValueCsvRecordGrouper(isGrouped ? file.getFileType().getIdColumn() : null);
Iterator<List<CSVRecord>> csvIterator = new CsvRecordGroupingIterator(parser, grouper);
Spliterator<List<CSVRecord>> spliterator = Spliterators.spliteratorUnknownSize(csvIterator, Spliterator.ORDERED | Spliterator.NONNULL);
Stream<List<CSVRecord>> csvRecordStream = StreamSupport.stream(spliterator, false).onClose(() -> {
try {
/*
* This will also close the Reader and InputStream that the
* CSVParser was consuming.
*/
parser.close();
} catch (IOException e) {
LOGGER.warn("Unable to close CSVParser", e);
}
});
/* Map each record group to a single RifRecordEvent. */
Stream<RifRecordEvent<?>> rifRecordStream = csvRecordStream.map(csvRecordGroup -> {
try {
Timer.Context parsingTimer = rifFileEvent.getEventMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordParsing")).time();
RifRecordEvent<?> recordEvent = recordParser.apply(rifFileEvent, csvRecordGroup);
parsingTimer.close();
return recordEvent;
} catch (InvalidRifValueException e) {
LOGGER.warn("Parse error encountered near line number '{}'.", csvRecordGroup.get(0).getRecordNumber());
throw new InvalidRifValueException(e);
}
});
return new RifFileRecords(rifFileEvent, rifRecordStream);
}
use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.
the class DefaultDataSetMonitorListener method dataAvailable.
/**
* @see
* gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetMonitorListener#dataAvailable(gov.cms.bfd.model.rif.RifFilesEvent)
*/
@Override
public void dataAvailable(RifFilesEvent rifFilesEvent) {
Timer.Context timerDataSet = appMetrics.timer(MetricRegistry.name(PipelineApplication.class.getSimpleName(), "dataSet", "processed")).time();
Consumer<Throwable> errorHandler = error -> {
/*
* This will be called on the same thread used to run each
* RifLoader task (probably a background one). This is not
* the right place to do any error _recovery_ (that'd have
* to be inside RifLoader itself), but it is likely the
* right place to decide when/if a failure is "bad enough"
* that the rest of processing should be stopped. Right now
* we stop that way for _any_ failure, but we probably want
* to be more discriminating than that.
*/
errorOccurred(error);
};
Consumer<RifRecordLoadResult> resultHandler = result -> {
/*
* Don't really *need* to do anything here. The RifLoader
* already records metrics for each data set.
*/
};
/*
* Each ETL stage produces a stream that will be handed off to
* and processed by the next stage.
*/
for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
Slf4jReporter dataSetFileMetricsReporter = Slf4jReporter.forRegistry(rifFileEvent.getEventMetrics()).outputTo(LOGGER).build();
dataSetFileMetricsReporter.start(2, TimeUnit.MINUTES);
RifFileRecords rifFileRecords = rifProcessor.produceRecords(rifFileEvent);
rifLoader.process(rifFileRecords, errorHandler, resultHandler);
dataSetFileMetricsReporter.stop();
dataSetFileMetricsReporter.report();
}
timerDataSet.stop();
}
use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.
the class LoadedFilterManagerIT method loadData.
/**
* @param sampleResources the sample RIF resources to load
*/
private static void loadData(DataSource dataSource, List<StaticRifResource> sampleResources) {
LoadAppOptions loadOptions = CcwRifLoadTestUtils.getLoadOptions();
RifFilesEvent rifFilesEvent = new RifFilesEvent(Instant.now(), sampleResources.stream().map(StaticRifResource::toRifFile).collect(Collectors.toList()));
// Create the processors that will handle each stage of the pipeline.
RifFilesProcessor processor = new RifFilesProcessor();
RifLoader loader = new RifLoader(loadOptions, PipelineTestUtils.get().getPipelineApplicationState());
// Link up the pipeline and run it.
for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
RifFileRecords rifFileRecords = processor.produceRecords(rifFileEvent);
loader.process(rifFileRecords, error -> {
}, result -> {
});
}
}
use of gov.cms.bfd.model.rif.RifFileEvent in project beneficiary-fhir-data by CMSgov.
the class ServerTestUtils method loadData.
/**
* @param sampleResources the sample RIF resources to load
* @return the {@link List} of RIF records that were loaded (e.g. {@link Beneficiary}s, etc.)
*/
public List<Object> loadData(List<StaticRifResource> sampleResources) {
LoadAppOptions loadOptions = CcwRifLoadTestUtils.getLoadOptions();
RifFilesEvent rifFilesEvent = new RifFilesEvent(Instant.now(), sampleResources.stream().map(r -> r.toRifFile()).collect(Collectors.toList()));
// Create the processors that will handle each stage of the pipeline.
RifFilesProcessor processor = new RifFilesProcessor();
// Link up the pipeline and run it.
RifLoader loader = new RifLoader(loadOptions, PipelineTestUtils.get().getPipelineApplicationState());
LOGGER.info("Loading RIF records...");
List<Object> recordsLoaded = new ArrayList<>();
for (RifFileEvent rifFileEvent : rifFilesEvent.getFileEvents()) {
RifFileRecords rifFileRecords = processor.produceRecords(rifFileEvent);
loader.process(rifFileRecords, error -> {
LOGGER.warn("Record(s) failed to load.", error);
}, result -> {
recordsLoaded.add(result.getRifRecordEvent().getRecord());
});
}
LOGGER.info("Loaded RIF records: '{}'.", recordsLoaded.size());
return recordsLoaded;
}
Aggregations