use of gov.cms.bfd.model.rif.samples.TestDataSetLocation in project beneficiary-fhir-data by CMSgov.
the class DataSetSubsetter method main.
/**
* The application entry point that can be used to run the {@link DataSetSubsetter}.
*
* @param args (not used)
* @throws Exception Any exceptions thrown will be bubbled up, terminating the app.
*/
public static void main(String[] args) throws Exception {
/*
* From the original source data set of 1M beneficiaries and their
* claims, create subsets going all the way down by powers of ten. This
* gives test authors lots of good options for how much data to test
* against. Note that on Karl's `jordan-u` system, this took 5.5h to
* run.
*/
for (int beneCount = 1000000; beneCount >= 10; beneCount /= 10) {
// Grab the source and target constants.
final int sourceBeneCount = beneCount;
final int targetBeneCount = beneCount / 10;
TestDataSetLocation sourceDataSet = Arrays.stream(TestDataSetLocation.class.getEnumConstants()).filter(c -> c.name().matches("DUMMY_DATA_" + sourceBeneCount + "_BENES")).findAny().get();
TestDataSetLocation targetDataSet = Arrays.stream(TestDataSetLocation.class.getEnumConstants()).filter(c -> c.name().matches("DUMMY_DATA_" + targetBeneCount + "_BENES")).findAny().get();
// Figure out what directories to store the source in locally.
Path outputDirectory = Paths.get(".", "test-data-random");
Files.createDirectories(outputDirectory);
String sourceDataSetId = Arrays.stream(sourceDataSet.getS3KeyPrefix().split("/")).reduce((a, b) -> b).get();
Path sourceDataSetDirectory = outputDirectory.resolve(sourceDataSetId);
// Download the source data set and build the target from it.
ExtractionOptions options = new ExtractionOptions(sourceDataSet.getS3BucketName());
String targetDataSetId = Arrays.stream(targetDataSet.getS3KeyPrefix().split("/")).reduce((a, b) -> b).get();
Path targetDataSetDirectory = outputDirectory.resolve(targetDataSetId);
Instant targetDataSetTimestamp = Instant.parse(targetDataSetId.replaceFirst("\\d+-beneficiaries-", ""));
try (IDataSetWriter output = new LocalDataSetWriter(targetDataSetDirectory, targetDataSetTimestamp)) {
Files.createDirectories(sourceDataSetDirectory);
List<RifFile> rifFiles = downloadDataSet(options, sourceDataSetId, sourceDataSetDirectory);
DataSetSubsetter.createSubset(output, targetBeneCount, rifFiles);
}
}
}
Aggregations