use of com.amazonaws.services.s3.model.Filter in project beneficiary-fhir-data by CMSgov.
the class DataSetSubsetter method createSubset.
/**
* Creates a subset of the specified input {@link RifFile}s, writing out the results via the
* {@link CSVPrinter}s provided by the specified {@link IDataSetWriter}.
*
* @param output the {@link IDataSetWriter} to get the needed {@link CSVPrinter}s from
* @param beneficiaryCount the target beneficiary count of the copy/subset to create
* @param rifFiles the input {@link RifFile}s to be subsetted
* @throws IOException Any {@link IOException}s encountered will be bubbled up.
*/
public static void createSubset(IDataSetWriter output, int beneficiaryCount, List<RifFile> rifFiles) throws IOException {
LOGGER.info("Scanning beneficiary IDs...");
List<RifFile> beneficiaryFiles = rifFiles.stream().filter(f -> f.getFileType() == RifFileType.BENEFICIARY).collect(Collectors.toList());
List<String> beneficiaryIds = new ArrayList<>();
for (RifFile beneficiaryFile : beneficiaryFiles) {
CSVParser parser = RifParsingUtils.createCsvParser(beneficiaryFile);
parser.forEach(r -> {
String beneficiaryId = r.get(BeneficiaryColumn.BENE_ID);
if (beneficiaryIds.contains(beneficiaryId))
throw new IllegalStateException();
beneficiaryIds.add(beneficiaryId);
});
parser.close();
}
LOGGER.info("Scanned beneficiary IDs.");
Set<String> selectedBeneficiaryIds = new HashSet<>(beneficiaryCount);
Collections.shuffle(beneficiaryIds);
for (int i = 0; i < beneficiaryCount; i++) selectedBeneficiaryIds.add(beneficiaryIds.get(i));
LOGGER.info("Selected '{}' random beneficiary IDs.", beneficiaryCount);
Map<RifFileType, Enum<?>> beneficiaryColumnByFileType = new HashMap<>();
beneficiaryColumnByFileType.put(RifFileType.BENEFICIARY, BeneficiaryColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.CARRIER, CarrierClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.DME, DMEClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.HHA, HHAClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.HOSPICE, HospiceClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.INPATIENT, InpatientClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.OUTPATIENT, OutpatientClaimColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.PDE, PartDEventColumn.BENE_ID);
beneficiaryColumnByFileType.put(RifFileType.SNF, SNFClaimColumn.BENE_ID);
for (RifFile rifFile : rifFiles) {
LOGGER.info("Subsetting RIF file: '{}'...", rifFile.getDisplayName());
CSVPrinter rifFilePrinter = output.getPrinter(rifFile.getFileType());
CSVParser parser = RifParsingUtils.createCsvParser(rifFile);
/*
* When we created the CSVPrinter, we told it to skip the header.
* That ensures that we don't write out a header until we've started
* reading the file and know what it is. Here, we print a "fake"
* first record with the header, as read from the input file.
* Previously, we'd been having the CSVPrinter create a header based
* on our RIF column enums, but that leads to us propagating errors
* in those enums to the sample files. It's better to let the files
* tell us what their headers are.
*/
rifFilePrinter.printRecord(parser.getHeaderMap().entrySet().stream().sorted(Map.Entry.comparingByValue()).map(e -> e.getKey()).toArray());
parser.forEach(r -> {
String beneficiaryId = r.get(beneficiaryColumnByFileType.get(rifFile.getFileType()));
if (selectedBeneficiaryIds.contains(beneficiaryId))
try {
rifFilePrinter.printRecord(r);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
LOGGER.info("Subsetted all RIF files.");
}
use of com.amazonaws.services.s3.model.Filter in project beneficiary-fhir-data by CMSgov.
the class SyntheticDataUploader method uploadSyntheticData.
/**
* @param s3Client the {@link AmazonS3} client to use
* @param s3KeyPrefix the S3 key prefix to upload all objects under/into
* @param syntheticDataPathGrabber the {@link Function} that returns the {@link Path} to upload
* from, for a given {@link SyntheticDataFile}
* @throws MalformedURLException Any {@link MalformedURLException}s encountered will be bubbled
* up.
*/
private static void uploadSyntheticData(AmazonS3 s3Client, String s3KeyPrefix, Function<SyntheticDataFile, Path> syntheticDataPathGrabber) throws MalformedURLException {
Bucket bucket = new Bucket(TestDataSetLocation.S3_BUCKET_TEST_DATA);
// Build a DataSetManifest for the data to be uploaded.
List<DataSetManifestEntry> manifestEntries = new LinkedList<>();
for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) manifestEntries.add(new DataSetManifestEntry(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString(), syntheticDataFile.getRifFile().getFileType()));
DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, manifestEntries);
// Upload the manifest and every file in it.
PutObjectRequest manifestRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest);
manifestRequest.setCannedAcl(CannedAccessControlList.PublicRead);
s3Client.putObject(manifestRequest);
LOGGER.info("Uploaded: manifest");
for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) {
DataSetManifestEntry manifestEntry = manifest.getEntries().stream().filter(e -> e.getName().equals(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString())).findFirst().get();
PutObjectRequest fileRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest, manifestEntry, syntheticDataPathGrabber.apply(syntheticDataFile).toUri().toURL());
fileRequest.setCannedAcl(CannedAccessControlList.PublicRead);
s3Client.putObject(fileRequest);
LOGGER.info("Uploaded: {}", syntheticDataFile.name());
}
}
use of com.amazonaws.services.s3.model.Filter in project beneficiary-fhir-data by CMSgov.
the class SyntheticDataUploader2 method uploadSyntheticData.
/**
* @param s3Client the {@link AmazonS3} client to use
* @param s3KeyPrefix the S3 key prefix to upload all objects under/into
* @param syntheticDataPathGrabber the {@link Function} that returns the {@link Path} to upload
* from, for a given {@link SyntheticDataFile}
* @throws MalformedURLException Any {@link MalformedURLException}s encountered will be bubbled
* up.
*/
private static void uploadSyntheticData(AmazonS3 s3Client, String s3KeyPrefix, Function<SyntheticDataFile, Path> syntheticDataPathGrabber) throws MalformedURLException {
Bucket bucket = new Bucket(TestDataSetLocation.S3_BUCKET_TEST_DATA);
// Build a DataSetManifest for the data to be uploaded.
List<DataSetManifestEntry> manifestEntries = new LinkedList<>();
for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) manifestEntries.add(new DataSetManifestEntry(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString(), syntheticDataFile.getRifFile().getFileType()));
DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, manifestEntries);
// Upload the manifest and every file in it.
PutObjectRequest manifestRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest);
manifestRequest.setCannedAcl(CannedAccessControlList.PublicRead);
s3Client.putObject(manifestRequest);
LOGGER.info("Uploaded: manifest");
for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) {
DataSetManifestEntry manifestEntry = manifest.getEntries().stream().filter(e -> e.getName().equals(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString())).findFirst().get();
PutObjectRequest fileRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest, manifestEntry, syntheticDataPathGrabber.apply(syntheticDataFile).toUri().toURL());
fileRequest.setCannedAcl(CannedAccessControlList.PublicRead);
s3Client.putObject(fileRequest);
LOGGER.info("Uploaded: {}", syntheticDataFile.name());
}
}
use of com.amazonaws.services.s3.model.Filter in project studio by craftercms.
the class AwsS3ServiceImpl method listItems.
/**
* {@inheritDoc}
*/
@Override
@HasPermission(type = DefaultPermission.class, action = "s3 read")
public List<S3Item> listItems(@ValidateStringParam(name = "siteId") @ProtectedResourceId("siteId") String siteId, @ValidateStringParam(name = "profileId") String profileId, @ValidateStringParam(name = "path") String path, @ValidateStringParam(name = "type") String type) throws AwsException {
S3Profile profile = getProfile(siteId, profileId);
AmazonS3 client = getS3Client(profile);
List<S3Item> items = new LinkedList<>();
Mimetypes mimetypes = Mimetypes.getInstance();
MimeType filerType = StringUtils.isEmpty(type) || StringUtils.equals(type, ITEM_FILTER) ? MimeTypeUtils.ALL : new MimeType(type);
String prefix = StringUtils.isEmpty(path) ? path : normalizePrefix(path);
ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(profile.getBucketName()).withPrefix(prefix).withDelimiter(delimiter);
ListObjectsV2Result result;
do {
result = client.listObjectsV2(request);
result.getCommonPrefixes().stream().map(p -> new S3Item(StringUtils.removeEnd(StringUtils.removeStart(p, prefix), delimiter), p, true)).forEach(items::add);
result.getObjectSummaries().stream().filter(o -> !StringUtils.equals(o.getKey(), prefix) && MimeType.valueOf(mimetypes.getMimetype(o.getKey())).isCompatibleWith(filerType)).map(o -> new S3Item(StringUtils.removeStart(o.getKey(), prefix), createUrl(profileId, o.getKey()), false)).forEach(items::add);
request.setContinuationToken(result.getNextContinuationToken());
} while (result.isTruncated());
return items;
}
use of com.amazonaws.services.s3.model.Filter in project amazon-qldb-dmv-sample-java by aws-samples.
the class JournalS3ExportReader method readExport.
/**
* Read the S3 export within a {@link JournalBlock}.
*
* @param describeJournalS3ExportResult
* The result from the QLDB database describing a journal export.
* @param amazonS3
* The low level S3 client.
* @return a list of {@link JournalBlock}.
*/
public static List<JournalBlock> readExport(final DescribeJournalS3ExportResult describeJournalS3ExportResult, final AmazonS3 amazonS3) {
S3ExportConfiguration exportConfiguration = describeJournalS3ExportResult.getExportDescription().getS3ExportConfiguration();
ListObjectsV2Request listObjectsRequest = new ListObjectsV2Request().withBucketName(exportConfiguration.getBucket()).withPrefix(exportConfiguration.getPrefix());
ListObjectsV2Result listObjectsV2Result = amazonS3.listObjectsV2(listObjectsRequest);
log.info("Found the following objects for list from s3: ");
listObjectsV2Result.getObjectSummaries().forEach(s3ObjectSummary -> log.info(s3ObjectSummary.getKey()));
// Validate initial manifest file was written.
String expectedManifestKey = exportConfiguration.getPrefix() + describeJournalS3ExportResult.getExportDescription().getExportId() + ".started" + ".manifest";
String initialManifestKey = listObjectsV2Result.getObjectSummaries().stream().filter(s3ObjectSummary -> s3ObjectSummary.getKey().equalsIgnoreCase(expectedManifestKey)).map(S3ObjectSummary::getKey).findFirst().orElseThrow(() -> new IllegalStateException("Initial manifest not found."));
log.info("Found the initial manifest with key " + initialManifestKey);
// Find the final manifest file, it should contain the exportId in it.
String completedManifestFileKey = listObjectsV2Result.getObjectSummaries().stream().filter(s3ObjectSummary -> s3ObjectSummary.getKey().endsWith("completed.manifest") && (s3ObjectSummary.getKey().contains(describeJournalS3ExportResult.getExportDescription().getExportId()))).map(S3ObjectSummary::getKey).findFirst().orElseThrow(() -> new IllegalStateException("Completed manifest not found."));
log.info("Found the completed manifest with key " + completedManifestFileKey);
// Read manifest file to find data file keys.
S3Object completedManifestObject = amazonS3.getObject(exportConfiguration.getBucket(), completedManifestFileKey);
List<String> dataFileKeys = getDataFileKeysFromManifest(completedManifestObject);
log.info("Found the following keys in the manifest files: " + dataFileKeys);
List<JournalBlock> journalBlocks = new ArrayList<>();
for (String key : dataFileKeys) {
log.info("Reading file with S3 key " + key + " from bucket: " + exportConfiguration.getBucket());
S3Object s3Object = amazonS3.getObject(exportConfiguration.getBucket(), key);
List<JournalBlock> blocks = getJournalBlocks(s3Object);
compareKeyWithContentRange(key, blocks.get(0), blocks.get(blocks.size() - 1));
journalBlocks.addAll(blocks);
}
return journalBlocks;
}
Aggregations