Search in sources :

Example 1 with RifFile

use of gov.cms.bfd.model.rif.RifFile in project beneficiary-fhir-data by CMSgov.

the class DataSetSubsetter method downloadDataSet.

/**
 * @param options the {@link ExtractionOptions} to use
 * @param dataSetS3KeyPrefix the S3 key prefix (i.e. directory) of the data set to download
 * @param downloadDirectory the Path to the directory to download the RIF files locally to
 * @return the {@link S3RifFile}s that comprise the full 1M beneficiary dummy data set
 */
private static List<RifFile> downloadDataSet(ExtractionOptions options, String dataSetS3KeyPrefix, Path downloadDirectory) {
    AmazonS3 s3Client = S3Utilities.createS3Client(options);
    TransferManager transferManager = TransferManagerBuilder.standard().withS3Client(s3Client).build();
    String dataSetPrefix = "data-random/" + dataSetS3KeyPrefix;
    String manifestSuffix = "1_manifest.xml";
    Path manifestDownloadPath = downloadDirectory.resolve(manifestSuffix);
    if (!Files.exists(manifestDownloadPath)) {
        String manifestKey = String.format("%s/%s", dataSetPrefix, manifestSuffix);
        Download manifestDownload = transferManager.download(options.getS3BucketName(), manifestKey, manifestDownloadPath.toFile());
        try {
            manifestDownload.waitForCompletion();
        } catch (AmazonClientException | InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    LOGGER.info("Manifest downloaded.");
    DataSetManifest dummyDataSetManifest;
    try {
        JAXBContext jaxbContext = JAXBContext.newInstance(DataSetManifest.class);
        Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
        dummyDataSetManifest = (DataSetManifest) jaxbUnmarshaller.unmarshal(manifestDownloadPath.toFile());
    } catch (JAXBException e) {
        throw new UncheckedJaxbException(e);
    }
    List<RifFile> rifFiles = new ArrayList<>();
    for (DataSetManifestEntry manifestEntry : dummyDataSetManifest.getEntries()) {
        String dataSetFileKey = String.format("%s/%s", dataSetPrefix, manifestEntry.getName());
        Path dataSetFileDownloadPath = downloadDirectory.resolve(manifestEntry.getName());
        if (!Files.exists(dataSetFileDownloadPath)) {
            LOGGER.info("Downloading RIF file: '{}'...", manifestEntry.getName());
            Download dataSetFileDownload = transferManager.download(options.getS3BucketName(), dataSetFileKey, dataSetFileDownloadPath.toFile());
            try {
                dataSetFileDownload.waitForCompletion();
            } catch (AmazonClientException | InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
        RifFile dataSetFile = new LocalRifFile(dataSetFileDownloadPath, manifestEntry.getType());
        rifFiles.add(dataSetFile);
    }
    transferManager.shutdownNow();
    LOGGER.info("Original RIF files ready.");
    return rifFiles;
}
Also used : Path(java.nio.file.Path) AmazonS3(com.amazonaws.services.s3.AmazonS3) TransferManager(com.amazonaws.services.s3.transfer.TransferManager) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) RifFile(gov.cms.bfd.model.rif.RifFile) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) AmazonClientException(com.amazonaws.AmazonClientException) JAXBException(javax.xml.bind.JAXBException) ArrayList(java.util.ArrayList) JAXBContext(javax.xml.bind.JAXBContext) UncheckedJaxbException(gov.cms.bfd.sharedutils.exceptions.UncheckedJaxbException) Unmarshaller(javax.xml.bind.Unmarshaller) Download(com.amazonaws.services.s3.transfer.Download) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry)

Example 2 with RifFile

use of gov.cms.bfd.model.rif.RifFile in project beneficiary-fhir-data by CMSgov.

the class DataSetSubsetter method createSubset.

/**
 * Creates a subset of the specified input {@link RifFile}s, writing out the results via the
 * {@link CSVPrinter}s provided by the specified {@link IDataSetWriter}.
 *
 * @param output the {@link IDataSetWriter} to get the needed {@link CSVPrinter}s from
 * @param beneficiaryCount the target beneficiary count of the copy/subset to create
 * @param rifFiles the input {@link RifFile}s to be subsetted
 * @throws IOException Any {@link IOException}s encountered will be bubbled up.
 */
public static void createSubset(IDataSetWriter output, int beneficiaryCount, List<RifFile> rifFiles) throws IOException {
    LOGGER.info("Scanning beneficiary IDs...");
    List<RifFile> beneficiaryFiles = rifFiles.stream().filter(f -> f.getFileType() == RifFileType.BENEFICIARY).collect(Collectors.toList());
    List<String> beneficiaryIds = new ArrayList<>();
    for (RifFile beneficiaryFile : beneficiaryFiles) {
        CSVParser parser = RifParsingUtils.createCsvParser(beneficiaryFile);
        parser.forEach(r -> {
            String beneficiaryId = r.get(BeneficiaryColumn.BENE_ID);
            if (beneficiaryIds.contains(beneficiaryId))
                throw new IllegalStateException();
            beneficiaryIds.add(beneficiaryId);
        });
        parser.close();
    }
    LOGGER.info("Scanned beneficiary IDs.");
    Set<String> selectedBeneficiaryIds = new HashSet<>(beneficiaryCount);
    Collections.shuffle(beneficiaryIds);
    for (int i = 0; i < beneficiaryCount; i++) selectedBeneficiaryIds.add(beneficiaryIds.get(i));
    LOGGER.info("Selected '{}' random beneficiary IDs.", beneficiaryCount);
    Map<RifFileType, Enum<?>> beneficiaryColumnByFileType = new HashMap<>();
    beneficiaryColumnByFileType.put(RifFileType.BENEFICIARY, BeneficiaryColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.CARRIER, CarrierClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.DME, DMEClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.HHA, HHAClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.HOSPICE, HospiceClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.INPATIENT, InpatientClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.OUTPATIENT, OutpatientClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.PDE, PartDEventColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.SNF, SNFClaimColumn.BENE_ID);
    for (RifFile rifFile : rifFiles) {
        LOGGER.info("Subsetting RIF file: '{}'...", rifFile.getDisplayName());
        CSVPrinter rifFilePrinter = output.getPrinter(rifFile.getFileType());
        CSVParser parser = RifParsingUtils.createCsvParser(rifFile);
        /*
       * When we created the CSVPrinter, we told it to skip the header.
       * That ensures that we don't write out a header until we've started
       * reading the file and know what it is. Here, we print a "fake"
       * first record with the header, as read from the input file.
       * Previously, we'd been having the CSVPrinter create a header based
       * on our RIF column enums, but that leads to us propagating errors
       * in those enums to the sample files. It's better to let the files
       * tell us what their headers are.
       */
        rifFilePrinter.printRecord(parser.getHeaderMap().entrySet().stream().sorted(Map.Entry.comparingByValue()).map(e -> e.getKey()).toArray());
        parser.forEach(r -> {
            String beneficiaryId = r.get(beneficiaryColumnByFileType.get(rifFile.getFileType()));
            if (selectedBeneficiaryIds.contains(beneficiaryId))
                try {
                    rifFilePrinter.printRecord(r);
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
        });
    }
    LOGGER.info("Subsetted all RIF files.");
}
Also used : Arrays(java.util.Arrays) CarrierClaimColumn(gov.cms.bfd.model.rif.CarrierClaimColumn) RifFileType(gov.cms.bfd.model.rif.RifFileType) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) S3Utilities(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3Utilities) LoggerFactory(org.slf4j.LoggerFactory) SNFClaimColumn(gov.cms.bfd.model.rif.SNFClaimColumn) HHAClaimColumn(gov.cms.bfd.model.rif.HHAClaimColumn) CSVFormat(org.apache.commons.csv.CSVFormat) Map(java.util.Map) CSVParser(org.apache.commons.csv.CSVParser) Path(java.nio.file.Path) TransferManagerBuilder(com.amazonaws.services.s3.transfer.TransferManagerBuilder) InpatientClaimColumn(gov.cms.bfd.model.rif.InpatientClaimColumn) Set(java.util.Set) RifFile(gov.cms.bfd.model.rif.RifFile) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) JAXBException(javax.xml.bind.JAXBException) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) UncheckedJaxbException(gov.cms.bfd.sharedutils.exceptions.UncheckedJaxbException) Entry(java.util.Map.Entry) RifParsingUtils(gov.cms.bfd.model.rif.parse.RifParsingUtils) AmazonClientException(com.amazonaws.AmazonClientException) BeneficiaryColumn(gov.cms.bfd.model.rif.BeneficiaryColumn) CSVPrinter(org.apache.commons.csv.CSVPrinter) TransferManager(com.amazonaws.services.s3.transfer.TransferManager) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) OutpatientClaimColumn(gov.cms.bfd.model.rif.OutpatientClaimColumn) HospiceClaimColumn(gov.cms.bfd.model.rif.HospiceClaimColumn) Marshaller(javax.xml.bind.Marshaller) HashMap(java.util.HashMap) ExtractionOptions(gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions) Download(com.amazonaws.services.s3.transfer.Download) ArrayList(java.util.ArrayList) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) HashSet(java.util.HashSet) TestDataSetLocation(gov.cms.bfd.model.rif.samples.TestDataSetLocation) AmazonS3(com.amazonaws.services.s3.AmazonS3) PartDEventColumn(gov.cms.bfd.model.rif.PartDEventColumn) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileWriter(java.io.FileWriter) IOException(java.io.IOException) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) Paths(java.nio.file.Paths) DMEClaimColumn(gov.cms.bfd.model.rif.DMEClaimColumn) Collections(java.util.Collections) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) RifFile(gov.cms.bfd.model.rif.RifFile) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) UncheckedIOException(java.io.UncheckedIOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) RifFileType(gov.cms.bfd.model.rif.RifFileType) CSVPrinter(org.apache.commons.csv.CSVPrinter) CSVParser(org.apache.commons.csv.CSVParser) HashSet(java.util.HashSet)

Example 3 with RifFile

use of gov.cms.bfd.model.rif.RifFile in project beneficiary-fhir-data by CMSgov.

the class RifFilesProcessor method produceRecords.

/**
 * @param rifFileEvent the {@link RifFileEvent} that is being processed
 * @return a {@link RifFileRecords} with the {@link RifRecordEvent}s produced from the specified
 *     {@link RifFileEvent}
 */
public RifFileRecords produceRecords(RifFileEvent rifFileEvent) {
    RifFile file = rifFileEvent.getFile();
    /*
     * Approach used here to parse CSV as a Java 8 Stream is courtesy of
     * https://rumianom.pl/rumianom/entry/apache-commons-csv-with-java.
     */
    CSVParser parser = RifParsingUtils.createCsvParser(file);
    boolean isGrouped;
    BiFunction<RifFileEvent, List<CSVRecord>, RifRecordEvent<?>> recordParser;
    if (file.getFileType() == RifFileType.BENEFICIARY) {
        isGrouped = false;
        recordParser = RifFilesProcessor::buildBeneficiaryEvent;
    } else if (file.getFileType() == RifFileType.BENEFICIARY_HISTORY) {
        isGrouped = false;
        recordParser = RifFilesProcessor::buildBeneficiaryHistoryEvent;
    } else if (file.getFileType() == RifFileType.MEDICARE_BENEFICIARY_ID_HISTORY) {
        isGrouped = false;
        recordParser = RifFilesProcessor::buildMedicareBeneficiaryIdHistoryEvent;
    } else if (file.getFileType() == RifFileType.PDE) {
        isGrouped = false;
        recordParser = RifFilesProcessor::buildPartDEvent;
    } else if (file.getFileType() == RifFileType.CARRIER) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildCarrierClaimEvent;
    } else if (file.getFileType() == RifFileType.INPATIENT) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildInpatientClaimEvent;
    } else if (file.getFileType() == RifFileType.OUTPATIENT) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildOutpatientClaimEvent;
    } else if (file.getFileType() == RifFileType.SNF) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildSNFClaimEvent;
    } else if (file.getFileType() == RifFileType.HOSPICE) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildHospiceClaimEvent;
    } else if (file.getFileType() == RifFileType.HHA) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildHHAClaimEvent;
    } else if (file.getFileType() == RifFileType.DME) {
        isGrouped = true;
        recordParser = RifFilesProcessor::buildDMEClaimEvent;
    } else {
        throw new UnsupportedRifFileTypeException("Unsupported file type:" + file.getFileType());
    }
    /*
     * Use the CSVParser to drive a Stream of grouped CSVRecords
     * (specifically, group by claim ID/lines).
     */
    CsvRecordGrouper grouper = new ColumnValueCsvRecordGrouper(isGrouped ? file.getFileType().getIdColumn() : null);
    Iterator<List<CSVRecord>> csvIterator = new CsvRecordGroupingIterator(parser, grouper);
    Spliterator<List<CSVRecord>> spliterator = Spliterators.spliteratorUnknownSize(csvIterator, Spliterator.ORDERED | Spliterator.NONNULL);
    Stream<List<CSVRecord>> csvRecordStream = StreamSupport.stream(spliterator, false).onClose(() -> {
        try {
            /*
                     * This will also close the Reader and InputStream that the
                     * CSVParser was consuming.
                     */
            parser.close();
        } catch (IOException e) {
            LOGGER.warn("Unable to close CSVParser", e);
        }
    });
    /* Map each record group to a single RifRecordEvent. */
    Stream<RifRecordEvent<?>> rifRecordStream = csvRecordStream.map(csvRecordGroup -> {
        try {
            Timer.Context parsingTimer = rifFileEvent.getEventMetrics().timer(MetricRegistry.name(getClass().getSimpleName(), "recordParsing")).time();
            RifRecordEvent<?> recordEvent = recordParser.apply(rifFileEvent, csvRecordGroup);
            parsingTimer.close();
            return recordEvent;
        } catch (InvalidRifValueException e) {
            LOGGER.warn("Parse error encountered near line number '{}'.", csvRecordGroup.get(0).getRecordNumber());
            throw new InvalidRifValueException(e);
        }
    });
    return new RifFileRecords(rifFileEvent, rifRecordStream);
}
Also used : CsvRecordGrouper(gov.cms.bfd.pipeline.ccw.rif.extract.CsvRecordGroupingIterator.CsvRecordGrouper) ColumnValueCsvRecordGrouper(gov.cms.bfd.pipeline.ccw.rif.extract.CsvRecordGroupingIterator.ColumnValueCsvRecordGrouper) RifFile(gov.cms.bfd.model.rif.RifFile) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) RifRecordEvent(gov.cms.bfd.model.rif.RifRecordEvent) ColumnValueCsvRecordGrouper(gov.cms.bfd.pipeline.ccw.rif.extract.CsvRecordGroupingIterator.ColumnValueCsvRecordGrouper) IOException(java.io.IOException) Timer(com.codahale.metrics.Timer) InvalidRifValueException(gov.cms.bfd.model.rif.parse.InvalidRifValueException) CSVParser(org.apache.commons.csv.CSVParser) List(java.util.List) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) UnsupportedRifFileTypeException(gov.cms.bfd.pipeline.ccw.rif.extract.exceptions.UnsupportedRifFileTypeException)

Example 4 with RifFile

use of gov.cms.bfd.model.rif.RifFile in project beneficiary-fhir-data by CMSgov.

the class DataSetSubsetter method main.

/**
 * The application entry point that can be used to run the {@link DataSetSubsetter}.
 *
 * @param args (not used)
 * @throws Exception Any exceptions thrown will be bubbled up, terminating the app.
 */
public static void main(String[] args) throws Exception {
    /*
     * From the original source data set of 1M beneficiaries and their
     * claims, create subsets going all the way down by powers of ten. This
     * gives test authors lots of good options for how much data to test
     * against. Note that on Karl's `jordan-u` system, this took 5.5h to
     * run.
     */
    for (int beneCount = 1000000; beneCount >= 10; beneCount /= 10) {
        // Grab the source and target constants.
        final int sourceBeneCount = beneCount;
        final int targetBeneCount = beneCount / 10;
        TestDataSetLocation sourceDataSet = Arrays.stream(TestDataSetLocation.class.getEnumConstants()).filter(c -> c.name().matches("DUMMY_DATA_" + sourceBeneCount + "_BENES")).findAny().get();
        TestDataSetLocation targetDataSet = Arrays.stream(TestDataSetLocation.class.getEnumConstants()).filter(c -> c.name().matches("DUMMY_DATA_" + targetBeneCount + "_BENES")).findAny().get();
        // Figure out what directories to store the source in locally.
        Path outputDirectory = Paths.get(".", "test-data-random");
        Files.createDirectories(outputDirectory);
        String sourceDataSetId = Arrays.stream(sourceDataSet.getS3KeyPrefix().split("/")).reduce((a, b) -> b).get();
        Path sourceDataSetDirectory = outputDirectory.resolve(sourceDataSetId);
        // Download the source data set and build the target from it.
        ExtractionOptions options = new ExtractionOptions(sourceDataSet.getS3BucketName());
        String targetDataSetId = Arrays.stream(targetDataSet.getS3KeyPrefix().split("/")).reduce((a, b) -> b).get();
        Path targetDataSetDirectory = outputDirectory.resolve(targetDataSetId);
        Instant targetDataSetTimestamp = Instant.parse(targetDataSetId.replaceFirst("\\d+-beneficiaries-", ""));
        try (IDataSetWriter output = new LocalDataSetWriter(targetDataSetDirectory, targetDataSetTimestamp)) {
            Files.createDirectories(sourceDataSetDirectory);
            List<RifFile> rifFiles = downloadDataSet(options, sourceDataSetId, sourceDataSetDirectory);
            DataSetSubsetter.createSubset(output, targetBeneCount, rifFiles);
        }
    }
}
Also used : Path(java.nio.file.Path) Arrays(java.util.Arrays) CarrierClaimColumn(gov.cms.bfd.model.rif.CarrierClaimColumn) RifFileType(gov.cms.bfd.model.rif.RifFileType) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) S3Utilities(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3Utilities) LoggerFactory(org.slf4j.LoggerFactory) SNFClaimColumn(gov.cms.bfd.model.rif.SNFClaimColumn) HHAClaimColumn(gov.cms.bfd.model.rif.HHAClaimColumn) CSVFormat(org.apache.commons.csv.CSVFormat) Map(java.util.Map) CSVParser(org.apache.commons.csv.CSVParser) Path(java.nio.file.Path) TransferManagerBuilder(com.amazonaws.services.s3.transfer.TransferManagerBuilder) InpatientClaimColumn(gov.cms.bfd.model.rif.InpatientClaimColumn) Set(java.util.Set) RifFile(gov.cms.bfd.model.rif.RifFile) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) JAXBException(javax.xml.bind.JAXBException) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) UncheckedJaxbException(gov.cms.bfd.sharedutils.exceptions.UncheckedJaxbException) Entry(java.util.Map.Entry) RifParsingUtils(gov.cms.bfd.model.rif.parse.RifParsingUtils) AmazonClientException(com.amazonaws.AmazonClientException) BeneficiaryColumn(gov.cms.bfd.model.rif.BeneficiaryColumn) CSVPrinter(org.apache.commons.csv.CSVPrinter) TransferManager(com.amazonaws.services.s3.transfer.TransferManager) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) OutpatientClaimColumn(gov.cms.bfd.model.rif.OutpatientClaimColumn) HospiceClaimColumn(gov.cms.bfd.model.rif.HospiceClaimColumn) Marshaller(javax.xml.bind.Marshaller) HashMap(java.util.HashMap) ExtractionOptions(gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions) Download(com.amazonaws.services.s3.transfer.Download) ArrayList(java.util.ArrayList) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) HashSet(java.util.HashSet) TestDataSetLocation(gov.cms.bfd.model.rif.samples.TestDataSetLocation) AmazonS3(com.amazonaws.services.s3.AmazonS3) PartDEventColumn(gov.cms.bfd.model.rif.PartDEventColumn) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileWriter(java.io.FileWriter) IOException(java.io.IOException) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) Paths(java.nio.file.Paths) DMEClaimColumn(gov.cms.bfd.model.rif.DMEClaimColumn) Collections(java.util.Collections) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) RifFile(gov.cms.bfd.model.rif.RifFile) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) TestDataSetLocation(gov.cms.bfd.model.rif.samples.TestDataSetLocation) Instant(java.time.Instant) ExtractionOptions(gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions)

Example 5 with RifFile

use of gov.cms.bfd.model.rif.RifFile in project beneficiary-fhir-data by CMSgov.

the class RifLoaderIT method failOnUpdateBeneficiaryBeforeInsert.

/**
 * Runs {@link RifLoader} against the {@link StaticRifResourceGroup#SAMPLE_A} data for an <code>
 * UPDATE</code> {@link Beneficiary} record that there hasn't been a previous <code>INSERT</code>
 * on, to verify that this fails as expected.
 */
@Test
public void failOnUpdateBeneficiaryBeforeInsert() {
    // Tweak the SAMPLE_A beneficiary to be an UPDATE.
    Stream<RifFile> samplesStream = filterSamples(r -> r.getFileType() == RifFileType.BENEFICIARY, StaticRifResourceGroup.SAMPLE_A.getResources());
    Function<RifRecordEvent<?>, List<List<String>>> recordEditor = rifRecordEvent -> {
        CSVRecord beneCsvRow = rifRecordEvent.getRawCsvRecords().get(0);
        List<String> beneCsvValues = StreamSupport.stream(beneCsvRow.spliterator(), false).collect(Collectors.toList());
        beneCsvValues.set(0, "UPDATE");
        return List.of(beneCsvValues);
    };
    Function<RifFile, RifFile> fileEditor = sample -> editSampleRecords(sample, recordEditor);
    Stream<RifFile> editedSample = editSamples(samplesStream, fileEditor);
    // Load the edited sample to verify that it fails, as expected.
    AssertionFailedError thrown = assertThrows(AssertionFailedError.class, () -> {
        loadSample("SAMPLE_A, bene only, UPDATE", CcwRifLoadTestUtils.getLoadOptions(), editedSample);
    });
    assertTrue(thrown.getMessage().contains("Load errors encountered"));
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) RifFileType(gov.cms.bfd.model.rif.RifFileType) StaticRifResource(gov.cms.bfd.model.rif.samples.StaticRifResource) CSVRecord(org.apache.commons.csv.CSVRecord) LoggerFactory(org.slf4j.LoggerFactory) IdHasher(gov.cms.bfd.pipeline.sharedutils.IdHasher) Disabled(org.junit.jupiter.api.Disabled) RifFilesProcessor(gov.cms.bfd.pipeline.ccw.rif.extract.RifFilesProcessor) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) CSVFormat(org.apache.commons.csv.CSVFormat) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AssertionFailedError(org.opentest4j.AssertionFailedError) CarrierClaimLine(gov.cms.bfd.model.rif.CarrierClaimLine) CriteriaBuilder(javax.persistence.criteria.CriteriaBuilder) BeneficiaryHistory(gov.cms.bfd.model.rif.BeneficiaryHistory) Path(java.nio.file.Path) CriteriaQuery(javax.persistence.criteria.CriteriaQuery) Predicate(java.util.function.Predicate) RifFileRecords(gov.cms.bfd.model.rif.RifFileRecords) RifFile(gov.cms.bfd.model.rif.RifFile) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) BeneficiaryMonthly(gov.cms.bfd.model.rif.BeneficiaryMonthly) TestInfo(org.junit.jupiter.api.TestInfo) RifFilesEvent(gov.cms.bfd.model.rif.RifFilesEvent) UncheckedIOException(java.io.UncheckedIOException) Test(org.junit.jupiter.api.Test) Beneficiary(gov.cms.bfd.model.rif.Beneficiary) LoadedBatch(gov.cms.bfd.model.rif.LoadedBatch) List(java.util.List) BeneficiaryHistory_(gov.cms.bfd.model.rif.BeneficiaryHistory_) Stream(java.util.stream.Stream) EntityManagerFactory(javax.persistence.EntityManagerFactory) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) CarrierClaim(gov.cms.bfd.model.rif.CarrierClaim) LocalDate(java.time.LocalDate) Slf4jReporter(com.codahale.metrics.Slf4jReporter) LoadedFile(gov.cms.bfd.model.rif.LoadedFile) Optional(java.util.Optional) RifParsingUtils(gov.cms.bfd.model.rif.parse.RifParsingUtils) BeneficiaryColumn(gov.cms.bfd.model.rif.BeneficiaryColumn) RifRecordEvent(gov.cms.bfd.model.rif.RifRecordEvent) CSVPrinter(org.apache.commons.csv.CSVPrinter) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) SkippedRifRecord(gov.cms.bfd.model.rif.SkippedRifRecord) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) Function(java.util.function.Function) PipelineTestUtils(gov.cms.bfd.pipeline.sharedutils.PipelineTestUtils) StreamSupport(java.util.stream.StreamSupport) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) StaticRifResourceGroup(gov.cms.bfd.model.rif.samples.StaticRifResourceGroup) Root(javax.persistence.criteria.Root) RifFileEvent(gov.cms.bfd.model.rif.RifFileEvent) Logger(org.slf4j.Logger) Files(java.nio.file.Files) Month(java.time.Month) FileWriter(java.io.FileWriter) IOException(java.io.IOException) EntityManager(javax.persistence.EntityManager) AfterEach(org.junit.jupiter.api.AfterEach) ChronoUnit(java.time.temporal.ChronoUnit) EntityTransaction(javax.persistence.EntityTransaction) RifFile(gov.cms.bfd.model.rif.RifFile) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) RifRecordEvent(gov.cms.bfd.model.rif.RifRecordEvent) List(java.util.List) CSVRecord(org.apache.commons.csv.CSVRecord) AssertionFailedError(org.opentest4j.AssertionFailedError) Test(org.junit.jupiter.api.Test)

Aggregations

RifFile (gov.cms.bfd.model.rif.RifFile)5 LocalRifFile (gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile)4 IOException (java.io.IOException)4 Path (java.nio.file.Path)4 List (java.util.List)4 AmazonClientException (com.amazonaws.AmazonClientException)3 AmazonS3 (com.amazonaws.services.s3.AmazonS3)3 Download (com.amazonaws.services.s3.transfer.Download)3 TransferManager (com.amazonaws.services.s3.transfer.TransferManager)3 BeneficiaryColumn (gov.cms.bfd.model.rif.BeneficiaryColumn)3 RifFileType (gov.cms.bfd.model.rif.RifFileType)3 RifParsingUtils (gov.cms.bfd.model.rif.parse.RifParsingUtils)3 TransferManagerBuilder (com.amazonaws.services.s3.transfer.TransferManagerBuilder)2 CarrierClaimColumn (gov.cms.bfd.model.rif.CarrierClaimColumn)2 DMEClaimColumn (gov.cms.bfd.model.rif.DMEClaimColumn)2 HHAClaimColumn (gov.cms.bfd.model.rif.HHAClaimColumn)2 HospiceClaimColumn (gov.cms.bfd.model.rif.HospiceClaimColumn)2 InpatientClaimColumn (gov.cms.bfd.model.rif.InpatientClaimColumn)2 OutpatientClaimColumn (gov.cms.bfd.model.rif.OutpatientClaimColumn)2 PartDEventColumn (gov.cms.bfd.model.rif.PartDEventColumn)2