Search in sources :

Example 1 with DataSetManifest

use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest in project beneficiary-fhir-data by CMSgov.

the class DataSetManifestTest method main.

/**
 * Just a simple little app that will use JAXB to marshall a sample {@link DataSetManifest} to
 * XML. This was used as the basis for the test resources used in these tests.
 *
 * @param args (not used)
 * @throws JAXBException (programmer error)
 */
public static void main(String[] args) throws JAXBException {
    DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, new DataSetManifestEntry("foo.xml", RifFileType.BENEFICIARY), new DataSetManifestEntry("bar.xml", RifFileType.PDE));
    JAXBContext jaxbContext = JAXBContext.newInstance(DataSetManifest.class);
    Marshaller jaxbMarshaller = jaxbContext.createMarshaller();
    jaxbMarshaller.marshal(manifest, System.out);
}
Also used : Marshaller(javax.xml.bind.Marshaller) JAXBContext(javax.xml.bind.JAXBContext) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry)

Example 2 with DataSetManifest

use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest in project beneficiary-fhir-data by CMSgov.

the class DataSetManifestTest method jaxbUnmarshallingForSampleB.

/**
 * Verifies that {@link DataSetManifest} can be unmarshalled, as expected. The sample XML document
 * used here was produced by Scott Koerselman on 2016-12-19.
 *
 * @throws JAXBException (indicates test failure)
 */
@Test
public void jaxbUnmarshallingForSampleB() throws JAXBException {
    InputStream manifestStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("manifest-sample-b.xml");
    JAXBContext jaxbContext = JAXBContext.newInstance(DataSetManifest.class);
    Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
    DataSetManifest manifest = (DataSetManifest) jaxbUnmarshaller.unmarshal(manifestStream);
    assertNotNull(manifest);
    assertNotNull(manifest.getTimestamp());
    assertEquals(2016, LocalDateTime.ofInstant(manifest.getTimestamp(), ZoneId.systemDefault()).get(ChronoField.YEAR));
    assertEquals(1, manifest.getSequenceId());
    assertEquals(9, manifest.getEntries().size());
    assertEquals("bene.txt", manifest.getEntries().get(0).getName());
    for (int i = 0; i < manifest.getEntries().size(); i++) {
        DataSetManifestEntry entry = manifest.getEntries().get(i);
        assertNotNull(entry, "Null entry: " + i);
        assertNotNull(entry.getName(), "Null entry name: " + i);
        assertNotNull(entry.getType(), "Null entry type: " + i);
    }
    assertEquals(RifFileType.BENEFICIARY, manifest.getEntries().get(0).getType());
}
Also used : InputStream(java.io.InputStream) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) Test(org.junit.jupiter.api.Test)

Example 3 with DataSetManifest

use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest in project beneficiary-fhir-data by CMSgov.

the class DataSetSubsetter method downloadDataSet.

/**
 * @param options the {@link ExtractionOptions} to use
 * @param dataSetS3KeyPrefix the S3 key prefix (i.e. directory) of the data set to download
 * @param downloadDirectory the Path to the directory to download the RIF files locally to
 * @return the {@link S3RifFile}s that comprise the full 1M beneficiary dummy data set
 */
private static List<RifFile> downloadDataSet(ExtractionOptions options, String dataSetS3KeyPrefix, Path downloadDirectory) {
    AmazonS3 s3Client = S3Utilities.createS3Client(options);
    TransferManager transferManager = TransferManagerBuilder.standard().withS3Client(s3Client).build();
    String dataSetPrefix = "data-random/" + dataSetS3KeyPrefix;
    String manifestSuffix = "1_manifest.xml";
    Path manifestDownloadPath = downloadDirectory.resolve(manifestSuffix);
    if (!Files.exists(manifestDownloadPath)) {
        String manifestKey = String.format("%s/%s", dataSetPrefix, manifestSuffix);
        Download manifestDownload = transferManager.download(options.getS3BucketName(), manifestKey, manifestDownloadPath.toFile());
        try {
            manifestDownload.waitForCompletion();
        } catch (AmazonClientException | InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    LOGGER.info("Manifest downloaded.");
    DataSetManifest dummyDataSetManifest;
    try {
        JAXBContext jaxbContext = JAXBContext.newInstance(DataSetManifest.class);
        Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
        dummyDataSetManifest = (DataSetManifest) jaxbUnmarshaller.unmarshal(manifestDownloadPath.toFile());
    } catch (JAXBException e) {
        throw new UncheckedJaxbException(e);
    }
    List<RifFile> rifFiles = new ArrayList<>();
    for (DataSetManifestEntry manifestEntry : dummyDataSetManifest.getEntries()) {
        String dataSetFileKey = String.format("%s/%s", dataSetPrefix, manifestEntry.getName());
        Path dataSetFileDownloadPath = downloadDirectory.resolve(manifestEntry.getName());
        if (!Files.exists(dataSetFileDownloadPath)) {
            LOGGER.info("Downloading RIF file: '{}'...", manifestEntry.getName());
            Download dataSetFileDownload = transferManager.download(options.getS3BucketName(), dataSetFileKey, dataSetFileDownloadPath.toFile());
            try {
                dataSetFileDownload.waitForCompletion();
            } catch (AmazonClientException | InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
        RifFile dataSetFile = new LocalRifFile(dataSetFileDownloadPath, manifestEntry.getType());
        rifFiles.add(dataSetFile);
    }
    transferManager.shutdownNow();
    LOGGER.info("Original RIF files ready.");
    return rifFiles;
}
Also used : Path(java.nio.file.Path) AmazonS3(com.amazonaws.services.s3.AmazonS3) TransferManager(com.amazonaws.services.s3.transfer.TransferManager) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) RifFile(gov.cms.bfd.model.rif.RifFile) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) AmazonClientException(com.amazonaws.AmazonClientException) JAXBException(javax.xml.bind.JAXBException) ArrayList(java.util.ArrayList) JAXBContext(javax.xml.bind.JAXBContext) UncheckedJaxbException(gov.cms.bfd.sharedutils.exceptions.UncheckedJaxbException) Unmarshaller(javax.xml.bind.Unmarshaller) Download(com.amazonaws.services.s3.transfer.Download) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry)

Example 4 with DataSetManifest

use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest in project beneficiary-fhir-data by CMSgov.

the class SyntheticDataUploader method uploadSyntheticData.

/**
 * @param s3Client the {@link AmazonS3} client to use
 * @param s3KeyPrefix the S3 key prefix to upload all objects under/into
 * @param syntheticDataPathGrabber the {@link Function} that returns the {@link Path} to upload
 *     from, for a given {@link SyntheticDataFile}
 * @throws MalformedURLException Any {@link MalformedURLException}s encountered will be bubbled
 *     up.
 */
private static void uploadSyntheticData(AmazonS3 s3Client, String s3KeyPrefix, Function<SyntheticDataFile, Path> syntheticDataPathGrabber) throws MalformedURLException {
    Bucket bucket = new Bucket(TestDataSetLocation.S3_BUCKET_TEST_DATA);
    // Build a DataSetManifest for the data to be uploaded.
    List<DataSetManifestEntry> manifestEntries = new LinkedList<>();
    for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) manifestEntries.add(new DataSetManifestEntry(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString(), syntheticDataFile.getRifFile().getFileType()));
    DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, manifestEntries);
    // Upload the manifest and every file in it.
    PutObjectRequest manifestRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest);
    manifestRequest.setCannedAcl(CannedAccessControlList.PublicRead);
    s3Client.putObject(manifestRequest);
    LOGGER.info("Uploaded: manifest");
    for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) {
        DataSetManifestEntry manifestEntry = manifest.getEntries().stream().filter(e -> e.getName().equals(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString())).findFirst().get();
        PutObjectRequest fileRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest, manifestEntry, syntheticDataPathGrabber.apply(syntheticDataFile).toUri().toURL());
        fileRequest.setCannedAcl(CannedAccessControlList.PublicRead);
        s3Client.putObject(fileRequest);
        LOGGER.info("Uploaded: {}", syntheticDataFile.name());
    }
}
Also used : SyntheticDataFile(gov.cms.bfd.pipeline.ccw.rif.extract.synthetic.SyntheticDataFixer.SyntheticDataFile) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) Bucket(com.amazonaws.services.s3.model.Bucket) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) LinkedList(java.util.LinkedList) PutObjectRequest(com.amazonaws.services.s3.model.PutObjectRequest)

Example 5 with DataSetManifest

use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest in project beneficiary-fhir-data by CMSgov.

the class SyntheticDataUploader2 method uploadSyntheticData.

/**
 * @param s3Client the {@link AmazonS3} client to use
 * @param s3KeyPrefix the S3 key prefix to upload all objects under/into
 * @param syntheticDataPathGrabber the {@link Function} that returns the {@link Path} to upload
 *     from, for a given {@link SyntheticDataFile}
 * @throws MalformedURLException Any {@link MalformedURLException}s encountered will be bubbled
 *     up.
 */
private static void uploadSyntheticData(AmazonS3 s3Client, String s3KeyPrefix, Function<SyntheticDataFile, Path> syntheticDataPathGrabber) throws MalformedURLException {
    Bucket bucket = new Bucket(TestDataSetLocation.S3_BUCKET_TEST_DATA);
    // Build a DataSetManifest for the data to be uploaded.
    List<DataSetManifestEntry> manifestEntries = new LinkedList<>();
    for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) manifestEntries.add(new DataSetManifestEntry(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString(), syntheticDataFile.getRifFile().getFileType()));
    DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, manifestEntries);
    // Upload the manifest and every file in it.
    PutObjectRequest manifestRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest);
    manifestRequest.setCannedAcl(CannedAccessControlList.PublicRead);
    s3Client.putObject(manifestRequest);
    LOGGER.info("Uploaded: manifest");
    for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) {
        DataSetManifestEntry manifestEntry = manifest.getEntries().stream().filter(e -> e.getName().equals(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString())).findFirst().get();
        PutObjectRequest fileRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest, manifestEntry, syntheticDataPathGrabber.apply(syntheticDataFile).toUri().toURL());
        fileRequest.setCannedAcl(CannedAccessControlList.PublicRead);
        s3Client.putObject(fileRequest);
        LOGGER.info("Uploaded: {}", syntheticDataFile.name());
    }
}
Also used : SyntheticDataFile(gov.cms.bfd.pipeline.ccw.rif.extract.synthetic.SyntheticDataFixer2.SyntheticDataFile) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) Bucket(com.amazonaws.services.s3.model.Bucket) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) LinkedList(java.util.LinkedList) PutObjectRequest(com.amazonaws.services.s3.model.PutObjectRequest)

Aggregations

DataSetManifestEntry (gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry)13 DataSetManifest (gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest)11 Bucket (com.amazonaws.services.s3.model.Bucket)8 AmazonS3 (com.amazonaws.services.s3.AmazonS3)6 ExtractionOptions (gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions)6 Test (org.junit.jupiter.api.Test)6 S3TaskManager (gov.cms.bfd.pipeline.ccw.rif.extract.s3.task.S3TaskManager)5 Path (java.nio.file.Path)4 PutObjectRequest (com.amazonaws.services.s3.model.PutObjectRequest)3 DataSetManifestId (gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestId)3 MockDataSetMonitorListener (gov.cms.bfd.pipeline.ccw.rif.extract.s3.MockDataSetMonitorListener)3 S3RifFile (gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile)3 IOException (java.io.IOException)3 UncheckedIOException (java.io.UncheckedIOException)3 ArrayList (java.util.ArrayList)3 LinkedList (java.util.LinkedList)3 JAXBContext (javax.xml.bind.JAXBContext)3 AmazonClientException (com.amazonaws.AmazonClientException)2 ListObjectsV2Request (com.amazonaws.services.s3.model.ListObjectsV2Request)2 ListObjectsV2Result (com.amazonaws.services.s3.model.ListObjectsV2Result)2