Search in sources :

Example 1 with Filter

use of com.amazonaws.services.s3.model.Filter in project beneficiary-fhir-data by CMSgov.

the class DataSetSubsetter method createSubset.

/**
 * Creates a subset of the specified input {@link RifFile}s, writing out the results via the
 * {@link CSVPrinter}s provided by the specified {@link IDataSetWriter}.
 *
 * @param output the {@link IDataSetWriter} to get the needed {@link CSVPrinter}s from
 * @param beneficiaryCount the target beneficiary count of the copy/subset to create
 * @param rifFiles the input {@link RifFile}s to be subsetted
 * @throws IOException Any {@link IOException}s encountered will be bubbled up.
 */
public static void createSubset(IDataSetWriter output, int beneficiaryCount, List<RifFile> rifFiles) throws IOException {
    LOGGER.info("Scanning beneficiary IDs...");
    List<RifFile> beneficiaryFiles = rifFiles.stream().filter(f -> f.getFileType() == RifFileType.BENEFICIARY).collect(Collectors.toList());
    List<String> beneficiaryIds = new ArrayList<>();
    for (RifFile beneficiaryFile : beneficiaryFiles) {
        CSVParser parser = RifParsingUtils.createCsvParser(beneficiaryFile);
        parser.forEach(r -> {
            String beneficiaryId = r.get(BeneficiaryColumn.BENE_ID);
            if (beneficiaryIds.contains(beneficiaryId))
                throw new IllegalStateException();
            beneficiaryIds.add(beneficiaryId);
        });
        parser.close();
    }
    LOGGER.info("Scanned beneficiary IDs.");
    Set<String> selectedBeneficiaryIds = new HashSet<>(beneficiaryCount);
    Collections.shuffle(beneficiaryIds);
    for (int i = 0; i < beneficiaryCount; i++) selectedBeneficiaryIds.add(beneficiaryIds.get(i));
    LOGGER.info("Selected '{}' random beneficiary IDs.", beneficiaryCount);
    Map<RifFileType, Enum<?>> beneficiaryColumnByFileType = new HashMap<>();
    beneficiaryColumnByFileType.put(RifFileType.BENEFICIARY, BeneficiaryColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.CARRIER, CarrierClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.DME, DMEClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.HHA, HHAClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.HOSPICE, HospiceClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.INPATIENT, InpatientClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.OUTPATIENT, OutpatientClaimColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.PDE, PartDEventColumn.BENE_ID);
    beneficiaryColumnByFileType.put(RifFileType.SNF, SNFClaimColumn.BENE_ID);
    for (RifFile rifFile : rifFiles) {
        LOGGER.info("Subsetting RIF file: '{}'...", rifFile.getDisplayName());
        CSVPrinter rifFilePrinter = output.getPrinter(rifFile.getFileType());
        CSVParser parser = RifParsingUtils.createCsvParser(rifFile);
        /*
       * When we created the CSVPrinter, we told it to skip the header.
       * That ensures that we don't write out a header until we've started
       * reading the file and know what it is. Here, we print a "fake"
       * first record with the header, as read from the input file.
       * Previously, we'd been having the CSVPrinter create a header based
       * on our RIF column enums, but that leads to us propagating errors
       * in those enums to the sample files. It's better to let the files
       * tell us what their headers are.
       */
        rifFilePrinter.printRecord(parser.getHeaderMap().entrySet().stream().sorted(Map.Entry.comparingByValue()).map(e -> e.getKey()).toArray());
        parser.forEach(r -> {
            String beneficiaryId = r.get(beneficiaryColumnByFileType.get(rifFile.getFileType()));
            if (selectedBeneficiaryIds.contains(beneficiaryId))
                try {
                    rifFilePrinter.printRecord(r);
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
        });
    }
    LOGGER.info("Subsetted all RIF files.");
}
Also used : Arrays(java.util.Arrays) CarrierClaimColumn(gov.cms.bfd.model.rif.CarrierClaimColumn) RifFileType(gov.cms.bfd.model.rif.RifFileType) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) S3Utilities(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3Utilities) LoggerFactory(org.slf4j.LoggerFactory) SNFClaimColumn(gov.cms.bfd.model.rif.SNFClaimColumn) HHAClaimColumn(gov.cms.bfd.model.rif.HHAClaimColumn) CSVFormat(org.apache.commons.csv.CSVFormat) Map(java.util.Map) CSVParser(org.apache.commons.csv.CSVParser) Path(java.nio.file.Path) TransferManagerBuilder(com.amazonaws.services.s3.transfer.TransferManagerBuilder) InpatientClaimColumn(gov.cms.bfd.model.rif.InpatientClaimColumn) Set(java.util.Set) RifFile(gov.cms.bfd.model.rif.RifFile) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) JAXBException(javax.xml.bind.JAXBException) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) UncheckedJaxbException(gov.cms.bfd.sharedutils.exceptions.UncheckedJaxbException) Entry(java.util.Map.Entry) RifParsingUtils(gov.cms.bfd.model.rif.parse.RifParsingUtils) AmazonClientException(com.amazonaws.AmazonClientException) BeneficiaryColumn(gov.cms.bfd.model.rif.BeneficiaryColumn) CSVPrinter(org.apache.commons.csv.CSVPrinter) TransferManager(com.amazonaws.services.s3.transfer.TransferManager) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) OutpatientClaimColumn(gov.cms.bfd.model.rif.OutpatientClaimColumn) HospiceClaimColumn(gov.cms.bfd.model.rif.HospiceClaimColumn) Marshaller(javax.xml.bind.Marshaller) HashMap(java.util.HashMap) ExtractionOptions(gov.cms.bfd.pipeline.ccw.rif.extract.ExtractionOptions) Download(com.amazonaws.services.s3.transfer.Download) ArrayList(java.util.ArrayList) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) HashSet(java.util.HashSet) TestDataSetLocation(gov.cms.bfd.model.rif.samples.TestDataSetLocation) AmazonS3(com.amazonaws.services.s3.AmazonS3) PartDEventColumn(gov.cms.bfd.model.rif.PartDEventColumn) JAXBContext(javax.xml.bind.JAXBContext) Unmarshaller(javax.xml.bind.Unmarshaller) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileWriter(java.io.FileWriter) IOException(java.io.IOException) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) Paths(java.nio.file.Paths) DMEClaimColumn(gov.cms.bfd.model.rif.DMEClaimColumn) Collections(java.util.Collections) S3RifFile(gov.cms.bfd.pipeline.ccw.rif.extract.s3.S3RifFile) RifFile(gov.cms.bfd.model.rif.RifFile) LocalRifFile(gov.cms.bfd.pipeline.ccw.rif.extract.LocalRifFile) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) UncheckedIOException(java.io.UncheckedIOException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) RifFileType(gov.cms.bfd.model.rif.RifFileType) CSVPrinter(org.apache.commons.csv.CSVPrinter) CSVParser(org.apache.commons.csv.CSVParser) HashSet(java.util.HashSet)

Example 2 with Filter

use of com.amazonaws.services.s3.model.Filter in project beneficiary-fhir-data by CMSgov.

the class SyntheticDataUploader method uploadSyntheticData.

/**
 * @param s3Client the {@link AmazonS3} client to use
 * @param s3KeyPrefix the S3 key prefix to upload all objects under/into
 * @param syntheticDataPathGrabber the {@link Function} that returns the {@link Path} to upload
 *     from, for a given {@link SyntheticDataFile}
 * @throws MalformedURLException Any {@link MalformedURLException}s encountered will be bubbled
 *     up.
 */
private static void uploadSyntheticData(AmazonS3 s3Client, String s3KeyPrefix, Function<SyntheticDataFile, Path> syntheticDataPathGrabber) throws MalformedURLException {
    Bucket bucket = new Bucket(TestDataSetLocation.S3_BUCKET_TEST_DATA);
    // Build a DataSetManifest for the data to be uploaded.
    List<DataSetManifestEntry> manifestEntries = new LinkedList<>();
    for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) manifestEntries.add(new DataSetManifestEntry(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString(), syntheticDataFile.getRifFile().getFileType()));
    DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, manifestEntries);
    // Upload the manifest and every file in it.
    PutObjectRequest manifestRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest);
    manifestRequest.setCannedAcl(CannedAccessControlList.PublicRead);
    s3Client.putObject(manifestRequest);
    LOGGER.info("Uploaded: manifest");
    for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) {
        DataSetManifestEntry manifestEntry = manifest.getEntries().stream().filter(e -> e.getName().equals(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString())).findFirst().get();
        PutObjectRequest fileRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest, manifestEntry, syntheticDataPathGrabber.apply(syntheticDataFile).toUri().toURL());
        fileRequest.setCannedAcl(CannedAccessControlList.PublicRead);
        s3Client.putObject(fileRequest);
        LOGGER.info("Uploaded: {}", syntheticDataFile.name());
    }
}
Also used : SyntheticDataFile(gov.cms.bfd.pipeline.ccw.rif.extract.synthetic.SyntheticDataFixer.SyntheticDataFile) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) Bucket(com.amazonaws.services.s3.model.Bucket) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) LinkedList(java.util.LinkedList) PutObjectRequest(com.amazonaws.services.s3.model.PutObjectRequest)

Example 3 with Filter

use of com.amazonaws.services.s3.model.Filter in project beneficiary-fhir-data by CMSgov.

the class SyntheticDataUploader2 method uploadSyntheticData.

/**
 * @param s3Client the {@link AmazonS3} client to use
 * @param s3KeyPrefix the S3 key prefix to upload all objects under/into
 * @param syntheticDataPathGrabber the {@link Function} that returns the {@link Path} to upload
 *     from, for a given {@link SyntheticDataFile}
 * @throws MalformedURLException Any {@link MalformedURLException}s encountered will be bubbled
 *     up.
 */
private static void uploadSyntheticData(AmazonS3 s3Client, String s3KeyPrefix, Function<SyntheticDataFile, Path> syntheticDataPathGrabber) throws MalformedURLException {
    Bucket bucket = new Bucket(TestDataSetLocation.S3_BUCKET_TEST_DATA);
    // Build a DataSetManifest for the data to be uploaded.
    List<DataSetManifestEntry> manifestEntries = new LinkedList<>();
    for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) manifestEntries.add(new DataSetManifestEntry(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString(), syntheticDataFile.getRifFile().getFileType()));
    DataSetManifest manifest = new DataSetManifest(Instant.now(), 0, manifestEntries);
    // Upload the manifest and every file in it.
    PutObjectRequest manifestRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest);
    manifestRequest.setCannedAcl(CannedAccessControlList.PublicRead);
    s3Client.putObject(manifestRequest);
    LOGGER.info("Uploaded: manifest");
    for (SyntheticDataFile syntheticDataFile : SyntheticDataFile.values()) {
        DataSetManifestEntry manifestEntry = manifest.getEntries().stream().filter(e -> e.getName().equals(syntheticDataPathGrabber.apply(syntheticDataFile).getFileName().toString())).findFirst().get();
        PutObjectRequest fileRequest = DataSetTestUtilities.createPutRequest(bucket, s3KeyPrefix, manifest, manifestEntry, syntheticDataPathGrabber.apply(syntheticDataFile).toUri().toURL());
        fileRequest.setCannedAcl(CannedAccessControlList.PublicRead);
        s3Client.putObject(fileRequest);
        LOGGER.info("Uploaded: {}", syntheticDataFile.name());
    }
}
Also used : SyntheticDataFile(gov.cms.bfd.pipeline.ccw.rif.extract.synthetic.SyntheticDataFixer2.SyntheticDataFile) DataSetManifest(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest) Bucket(com.amazonaws.services.s3.model.Bucket) DataSetManifestEntry(gov.cms.bfd.pipeline.ccw.rif.extract.s3.DataSetManifest.DataSetManifestEntry) LinkedList(java.util.LinkedList) PutObjectRequest(com.amazonaws.services.s3.model.PutObjectRequest)

Example 4 with Filter

use of com.amazonaws.services.s3.model.Filter in project studio by craftercms.

the class AwsS3ServiceImpl method listItems.

/**
 * {@inheritDoc}
 */
@Override
@HasPermission(type = DefaultPermission.class, action = "s3 read")
public List<S3Item> listItems(@ValidateStringParam(name = "siteId") @ProtectedResourceId("siteId") String siteId, @ValidateStringParam(name = "profileId") String profileId, @ValidateStringParam(name = "path") String path, @ValidateStringParam(name = "type") String type) throws AwsException {
    S3Profile profile = getProfile(siteId, profileId);
    AmazonS3 client = getS3Client(profile);
    List<S3Item> items = new LinkedList<>();
    Mimetypes mimetypes = Mimetypes.getInstance();
    MimeType filerType = StringUtils.isEmpty(type) || StringUtils.equals(type, ITEM_FILTER) ? MimeTypeUtils.ALL : new MimeType(type);
    String prefix = StringUtils.isEmpty(path) ? path : normalizePrefix(path);
    ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(profile.getBucketName()).withPrefix(prefix).withDelimiter(delimiter);
    ListObjectsV2Result result;
    do {
        result = client.listObjectsV2(request);
        result.getCommonPrefixes().stream().map(p -> new S3Item(StringUtils.removeEnd(StringUtils.removeStart(p, prefix), delimiter), p, true)).forEach(items::add);
        result.getObjectSummaries().stream().filter(o -> !StringUtils.equals(o.getKey(), prefix) && MimeType.valueOf(mimetypes.getMimetype(o.getKey())).isCompatibleWith(filerType)).map(o -> new S3Item(StringUtils.removeStart(o.getKey(), prefix), createUrl(profileId, o.getKey()), false)).forEach(items::add);
        request.setContinuationToken(result.getNextContinuationToken());
    } while (result.isTruncated());
    return items;
}
Also used : S3Item(org.craftercms.studio.model.aws.s3.S3Item) AwsUtils(org.craftercms.studio.impl.v1.service.aws.AwsUtils) S3Item(org.craftercms.studio.model.aws.s3.S3Item) HasPermission(org.craftercms.commons.security.permissions.annotations.HasPermission) AbstractAwsService(org.craftercms.studio.api.v1.service.aws.AbstractAwsService) AwsS3Service(org.craftercms.studio.api.v2.service.aws.s3.AwsS3Service) S3Profile(org.craftercms.commons.config.profiles.aws.S3Profile) StringUtils.stripStart(org.apache.commons.lang3.StringUtils.stripStart) ProtectedResourceId(org.craftercms.commons.security.permissions.annotations.ProtectedResourceId) MimeTypeUtils(org.springframework.util.MimeTypeUtils) StringUtils.appendIfMissing(org.apache.commons.lang3.StringUtils.appendIfMissing) StringUtils(org.apache.commons.lang3.StringUtils) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) MimeType(org.springframework.util.MimeType) ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) AwsException(org.craftercms.studio.api.v1.exception.AwsException) List(java.util.List) Mimetypes(com.amazonaws.services.s3.internal.Mimetypes) S3ClientCachingFactory(org.craftercms.commons.aws.S3ClientCachingFactory) AmazonS3(com.amazonaws.services.s3.AmazonS3) DefaultPermission(org.craftercms.commons.security.permissions.DefaultPermission) Required(org.springframework.beans.factory.annotation.Required) ValidateStringParam(org.craftercms.commons.validation.annotations.param.ValidateStringParam) LinkedList(java.util.LinkedList) InputStream(java.io.InputStream) AmazonS3(com.amazonaws.services.s3.AmazonS3) ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) Mimetypes(com.amazonaws.services.s3.internal.Mimetypes) S3Profile(org.craftercms.commons.config.profiles.aws.S3Profile) LinkedList(java.util.LinkedList) MimeType(org.springframework.util.MimeType) HasPermission(org.craftercms.commons.security.permissions.annotations.HasPermission)

Example 5 with Filter

use of com.amazonaws.services.s3.model.Filter in project amazon-qldb-dmv-sample-java by aws-samples.

the class JournalS3ExportReader method readExport.

/**
 * Read the S3 export within a {@link JournalBlock}.
 *
 * @param describeJournalS3ExportResult
 *              The result from the QLDB database describing a journal export.
 * @param amazonS3
 *              The low level S3 client.
 * @return a list of {@link JournalBlock}.
 */
public static List<JournalBlock> readExport(final DescribeJournalS3ExportResult describeJournalS3ExportResult, final AmazonS3 amazonS3) {
    S3ExportConfiguration exportConfiguration = describeJournalS3ExportResult.getExportDescription().getS3ExportConfiguration();
    ListObjectsV2Request listObjectsRequest = new ListObjectsV2Request().withBucketName(exportConfiguration.getBucket()).withPrefix(exportConfiguration.getPrefix());
    ListObjectsV2Result listObjectsV2Result = amazonS3.listObjectsV2(listObjectsRequest);
    log.info("Found the following objects for list from s3: ");
    listObjectsV2Result.getObjectSummaries().forEach(s3ObjectSummary -> log.info(s3ObjectSummary.getKey()));
    // Validate initial manifest file was written.
    String expectedManifestKey = exportConfiguration.getPrefix() + describeJournalS3ExportResult.getExportDescription().getExportId() + ".started" + ".manifest";
    String initialManifestKey = listObjectsV2Result.getObjectSummaries().stream().filter(s3ObjectSummary -> s3ObjectSummary.getKey().equalsIgnoreCase(expectedManifestKey)).map(S3ObjectSummary::getKey).findFirst().orElseThrow(() -> new IllegalStateException("Initial manifest not found."));
    log.info("Found the initial manifest with key " + initialManifestKey);
    // Find the final manifest file, it should contain the exportId in it.
    String completedManifestFileKey = listObjectsV2Result.getObjectSummaries().stream().filter(s3ObjectSummary -> s3ObjectSummary.getKey().endsWith("completed.manifest") && (s3ObjectSummary.getKey().contains(describeJournalS3ExportResult.getExportDescription().getExportId()))).map(S3ObjectSummary::getKey).findFirst().orElseThrow(() -> new IllegalStateException("Completed manifest not found."));
    log.info("Found the completed manifest with key " + completedManifestFileKey);
    // Read manifest file to find data file keys.
    S3Object completedManifestObject = amazonS3.getObject(exportConfiguration.getBucket(), completedManifestFileKey);
    List<String> dataFileKeys = getDataFileKeysFromManifest(completedManifestObject);
    log.info("Found the following keys in the manifest files: " + dataFileKeys);
    List<JournalBlock> journalBlocks = new ArrayList<>();
    for (String key : dataFileKeys) {
        log.info("Reading file with S3 key " + key + " from bucket: " + exportConfiguration.getBucket());
        S3Object s3Object = amazonS3.getObject(exportConfiguration.getBucket(), key);
        List<JournalBlock> blocks = getJournalBlocks(s3Object);
        compareKeyWithContentRange(key, blocks.get(0), blocks.get(blocks.size() - 1));
        journalBlocks.addAll(blocks);
    }
    return journalBlocks;
}
Also used : JournalBlock(software.amazon.qldb.tutorial.qldb.JournalBlock) Logger(org.slf4j.Logger) IonSystem(com.amazon.ion.IonSystem) IonReaderBuilder(com.amazon.ion.system.IonReaderBuilder) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) IonList(com.amazon.ion.IonList) DescribeJournalS3ExportResult(com.amazonaws.services.qldb.model.DescribeJournalS3ExportResult) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) ArrayList(java.util.ArrayList) IonStruct(com.amazon.ion.IonStruct) ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) List(java.util.List) IonReader(com.amazon.ion.IonReader) IonType(com.amazon.ion.IonType) IonSystemBuilder(com.amazon.ion.system.IonSystemBuilder) S3Object(com.amazonaws.services.s3.model.S3Object) S3ExportConfiguration(com.amazonaws.services.qldb.model.S3ExportConfiguration) AmazonS3(com.amazonaws.services.s3.AmazonS3) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) IonString(com.amazon.ion.IonString) ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) ArrayList(java.util.ArrayList) S3ExportConfiguration(com.amazonaws.services.qldb.model.S3ExportConfiguration) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) IonString(com.amazon.ion.IonString) S3Object(com.amazonaws.services.s3.model.S3Object) JournalBlock(software.amazon.qldb.tutorial.qldb.JournalBlock)

Aggregations

Filter (com.amazonaws.services.ec2.model.Filter)96 ArrayList (java.util.ArrayList)70 List (java.util.List)52 Collectors (java.util.stream.Collectors)46 IOException (java.io.IOException)41 HashMap (java.util.HashMap)38 Map (java.util.Map)35 AmazonS3 (com.amazonaws.services.s3.AmazonS3)34 Set (java.util.Set)31 DescribeInstancesRequest (com.amazonaws.services.ec2.model.DescribeInstancesRequest)30 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)27 Instance (com.amazonaws.services.ec2.model.Instance)26 HashSet (java.util.HashSet)26 Reservation (com.amazonaws.services.ec2.model.Reservation)24 Collections (java.util.Collections)23 DescribeInstancesResult (com.amazonaws.services.ec2.model.DescribeInstancesResult)21 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)21 DescribeSubnetsRequest (com.amazonaws.services.ec2.model.DescribeSubnetsRequest)20 Entry (java.util.Map.Entry)20 Tag (com.amazonaws.services.ec2.model.Tag)18