use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry in project beneficiary-fhir-data by CMSgov.
the class DataSetManifestTest method main.
/**
* Just a simple little app that will use JAXB to marshall a sample {@link DataSetManifest} to
* XML. This was used as the basis for the test resources used in these tests.
*
* @param args (not used)
* @throws JAXBException (programmer error)
*/
public static void main(String[] args) throws JAXBException {
DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, new DataSetManifestEntry("foo.xml", RifFileType.BENEFICIARY), new DataSetManifestEntry("bar.xml", RifFileType.PDE));
JAXBContext jaxbContext = JAXBContext.newInstance(DataSetManifest.class);
Marshaller jaxbMarshaller = jaxbContext.createMarshaller();
jaxbMarshaller.marshal(manifest, System.out);
}
use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry in project beneficiary-fhir-data by CMSgov.
the class DataSetManifestTest method jaxbUnmarshallingForSampleB.
/**
* Verifies that {@link DataSetManifest} can be unmarshalled, as expected. The sample XML document
* used here was produced by Scott Koerselman on 2016-12-19.
*
* @throws JAXBException (indicates test failure)
*/
@Test
public void jaxbUnmarshallingForSampleB() throws JAXBException {
InputStream manifestStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("manifest-sample-b.xml");
JAXBContext jaxbContext = JAXBContext.newInstance(DataSetManifest.class);
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
DataSetManifest manifest = (DataSetManifest) jaxbUnmarshaller.unmarshal(manifestStream);
assertNotNull(manifest);
assertNotNull(manifest.getTimestamp());
assertEquals(2016, LocalDateTime.ofInstant(manifest.getTimestamp(), ZoneId.systemDefault()).get(ChronoField.YEAR));
assertEquals(1, manifest.getSequenceId());
assertEquals(9, manifest.getEntries().size());
assertEquals("bene.txt", manifest.getEntries().get(0).getName());
for (int i = 0; i < manifest.getEntries().size(); i++) {
DataSetManifestEntry entry = manifest.getEntries().get(i);
assertNotNull(entry, "Null entry: " + i);
assertNotNull(entry.getName(), "Null entry name: " + i);
assertNotNull(entry.getType(), "Null entry type: " + i);
}
assertEquals(RifFileType.BENEFICIARY, manifest.getEntries().get(0).getType());
}
use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry in project beneficiary-fhir-data by CMSgov.
the class DataSetSubsetter method downloadDataSet.
/**
* @param options the {@link ExtractionOptions} to use
* @param dataSetS3KeyPrefix the S3 key prefix (i.e. directory) of the data set to download
* @param downloadDirectory the Path to the directory to download the RIF files locally to
* @return the {@link S3RifFile}s that comprise the full 1M beneficiary dummy data set
*/
private static List<RifFile> downloadDataSet(ExtractionOptions options, String dataSetS3KeyPrefix, Path downloadDirectory) {
AmazonS3 s3Client = S3Utilities.createS3Client(options);
TransferManager transferManager = TransferManagerBuilder.standard().withS3Client(s3Client).build();
String dataSetPrefix = "data-random/" + dataSetS3KeyPrefix;
String manifestSuffix = "1_manifest.xml";
Path manifestDownloadPath = downloadDirectory.resolve(manifestSuffix);
if (!Files.exists(manifestDownloadPath)) {
String manifestKey = String.format("%s/%s", dataSetPrefix, manifestSuffix);
Download manifestDownload = transferManager.download(options.getS3BucketName(), manifestKey, manifestDownloadPath.toFile());
try {
manifestDownload.waitForCompletion();
} catch (AmazonClientException | InterruptedException e) {
throw new RuntimeException(e);
}
}
LOGGER.info("Manifest downloaded.");
DataSetManifest dummyDataSetManifest;
try {
JAXBContext jaxbContext = JAXBContext.newInstance(DataSetManifest.class);
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
dummyDataSetManifest = (DataSetManifest) jaxbUnmarshaller.unmarshal(manifestDownloadPath.toFile());
} catch (JAXBException e) {
throw new UncheckedJaxbException(e);
}
List<RifFile> rifFiles = new ArrayList<>();
for (DataSetManifestEntry manifestEntry : dummyDataSetManifest.getEntries()) {
String dataSetFileKey = String.format("%s/%s", dataSetPrefix, manifestEntry.getName());
Path dataSetFileDownloadPath = downloadDirectory.resolve(manifestEntry.getName());
if (!Files.exists(dataSetFileDownloadPath)) {
LOGGER.info("Downloading RIF file: '{}'...", manifestEntry.getName());
Download dataSetFileDownload = transferManager.download(options.getS3BucketName(), dataSetFileKey, dataSetFileDownloadPath.toFile());
try {
dataSetFileDownload.waitForCompletion();
} catch (AmazonClientException | InterruptedException e) {
throw new RuntimeException(e);
}
}
RifFile dataSetFile = new LocalRifFile(dataSetFileDownloadPath, manifestEntry.getType());
rifFiles.add(dataSetFile);
}
transferManager.shutdownNow();
LOGGER.info("Original RIF files ready.");
return rifFiles;
}
use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry in project beneficiary-fhir-data by CMSgov.
the class SyntheticDataUploader method uploadSyntheticData.
/**
* @param s3Client the {@link AmazonS3} client to use
* @param s3KeyPrefix the S3 key prefix to upload all objects under/into
* @param syntheticDataPathGrabber the {@link Function} that returns the {@link Path} to upload
* from, for a given {@link SyntheticDataFile}
* @throws MalformedURLException Any {@link MalformedURLException}s encountered will be bubbled
* up.
*/
private static void uploadSyntheticData(AmazonS3 s3Client, String s3KeyPrefix, Function<SyntheticDataFile, Path> syntheticDataPathGrabber) throws MalformedURLException {
Bucket bucket = new Bucket(TestDataSetLocation.S3_BUCKET_TEST_DATA);
// Build a DataSetManifest for the data to be uploaded.
List<DataSetManifestEntry> manifestEntries = new LinkedList<>();
for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) manifestEntries.add(new DataSetManifestEntry(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString(), syntheticDataFile.getRifFile().getFileType()));
DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, manifestEntries);
// Upload the manifest and every file in it.
PutObjectRequest manifestRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest);
manifestRequest.setCannedAcl(CannedAccessControlList.PublicRead);
s3Client.putObject(manifestRequest);
LOGGER.info("Uploaded: manifest");
for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) {
DataSetManifestEntry manifestEntry = manifest.getEntries().stream().filter(e -> e.getName().equals(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString())).findFirst().get();
PutObjectRequest fileRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest, manifestEntry, syntheticDataPathGrabber.apply(syntheticDataFile).toUri().toURL());
fileRequest.setCannedAcl(CannedAccessControlList.PublicRead);
s3Client.putObject(fileRequest);
LOGGER.info("Uploaded: {}", syntheticDataFile.name());
}
}
use of gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry in project beneficiary-fhir-data by CMSgov.
the class SyntheticDataUploader2 method uploadSyntheticData.
/**
* @param s3Client the {@link AmazonS3} client to use
* @param s3KeyPrefix the S3 key prefix to upload all objects under/into
* @param syntheticDataPathGrabber the {@link Function} that returns the {@link Path} to upload
* from, for a given {@link SyntheticDataFile}
* @throws MalformedURLException Any {@link MalformedURLException}s encountered will be bubbled
* up.
*/
private static void uploadSyntheticData(AmazonS3 s3Client, String s3KeyPrefix, Function<SyntheticDataFile, Path> syntheticDataPathGrabber) throws MalformedURLException {
Bucket bucket = new Bucket(TestDataSetLocation.S3_BUCKET_TEST_DATA);
// Build a DataSetManifest for the data to be uploaded.
List<DataSetManifestEntry> manifestEntries = new LinkedList<>();
for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) manifestEntries.add(new DataSetManifestEntry(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString(), syntheticDataFile.getRifFile().getFileType()));
DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, manifestEntries);
// Upload the manifest and every file in it.
PutObjectRequest manifestRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest);
manifestRequest.setCannedAcl(CannedAccessControlList.PublicRead);
s3Client.putObject(manifestRequest);
LOGGER.info("Uploaded: manifest");
for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) {
DataSetManifestEntry manifestEntry = manifest.getEntries().stream().filter(e -> e.getName().equals(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString())).findFirst().get();
PutObjectRequest fileRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest, manifestEntry, syntheticDataPathGrabber.apply(syntheticDataFile).toUri().toURL());
fileRequest.setCannedAcl(CannedAccessControlList.PublicRead);
s3Client.putObject(fileRequest);
LOGGER.info("Uploaded: {}", syntheticDataFile.name());
}
}
Aggregations