use of gov.cms.bfd.pipeline.ccw.rif.extract.exceptions.ChecksumException in project beneficiary-fhir-data by CMSgov.
the class DataSetTestUtilities method createPutRequest.
/**
* @param bucket the {@link Bucket} to place the new object in
* @param keyPrefix the S3 key prefix to store the new object under
* @param manifest the {@link DataSetManifest} to create an object for
* @param manifestEntry the {@link DataSetManifestEntry} to create an object for
* @param objectContentsUrl a {@link URL} to the data to push as the new object's content
* @return a {@link PutObjectRequest} for the specified content
*/
public static PutObjectRequest createPutRequest(Bucket bucket, String keyPrefix, DataSetManifest manifest, DataSetManifestEntry manifestEntry, URL objectContentsUrl) {
String objectKey = String.format("%s/%s", keyPrefix, manifestEntry.getName());
try {
// If this isn't specified, the AWS API logs annoying warnings.
int objectContentLength = objectContentsUrl.openConnection().getContentLength();
ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentLength(objectContentLength);
// create md5chksum on file to be uploaded
objectMetadata.addUserMetadata("md5chksum", ManifestEntryDownloadTask.computeMD5ChkSum(objectContentsUrl.openStream()));
PutObjectRequest request = new PutObjectRequest(bucket.getName(), objectKey, objectContentsUrl.openStream(), objectMetadata);
/*
* Per https://github.com/aws/aws-sdk-java/issues/427, this is
* required when PUTing objects from an InputStream (as opposed to a
* File). Without it, was seeing intermittent errors.
*/
request.getRequestClientOptions().setReadLimit(objectContentLength + 1);
return request;
} catch (IOException e) {
throw new UncheckedIOException(e);
} catch (NoSuchAlgorithmException e) {
throw new ChecksumException("NoSuchAlgorithmException on file " + manifest.getTimestampText() + manifestEntry.getName() + "trying to build md5chksum", e);
}
}
use of gov.cms.bfd.pipeline.ccw.rif.extract.exceptions.ChecksumException in project beneficiary-fhir-data by CMSgov.
the class ManifestEntryDownloadTask method call.
/**
* @see java.util.concurrent.Callable#call()
*/
@Override
public ManifestEntryDownloadResult call() throws Exception {
try {
GetObjectRequest objectRequest = new GetObjectRequest(options.getS3BucketName(), String.format("%s/%s/%s", CcwRifLoadJob.S3_PREFIX_PENDING_DATA_SETS, manifestEntry.getParentManifest().getTimestampText(), manifestEntry.getName()));
Path localTempFile = Files.createTempFile("data-pipeline-s3-temp", ".rif");
Timer.Context downloadTimer = appMetrics.timer(MetricRegistry.name(getClass().getSimpleName(), "downloadSystemTime")).time();
LOGGER.debug("Downloading '{}' to '{}'...", manifestEntry, localTempFile.toAbsolutePath().toString());
Download downloadHandle = s3TaskManager.getS3TransferManager().download(objectRequest, localTempFile.toFile());
downloadHandle.waitForCompletion();
LOGGER.debug("Downloaded '{}' to '{}'.", manifestEntry, localTempFile.toAbsolutePath().toString());
downloadTimer.close();
// generate MD5ChkSum value on file just downloaded
Timer.Context md5ChkSumTimer = appMetrics.timer(MetricRegistry.name(getClass().getSimpleName(), "md5ChkSumSystemTime")).time();
InputStream downloadedInputStream = new FileInputStream(localTempFile.toString());
String generatedMD5ChkSum = ManifestEntryDownloadTask.computeMD5ChkSum(downloadedInputStream);
md5ChkSumTimer.close();
String downloadedFileMD5ChkSum = downloadHandle.getObjectMetadata().getUserMetaDataOf("md5chksum");
// TODO Remove null check below once Jira CBBD-368 is completed
if ((downloadedFileMD5ChkSum != null) && (!generatedMD5ChkSum.equals(downloadedFileMD5ChkSum)))
throw new ChecksumException("Checksum doesn't match on downloaded file " + localTempFile + " manifest entry is " + manifestEntry.toString());
return new ManifestEntryDownloadResult(manifestEntry, localTempFile);
} catch (IOException e) {
throw new UncheckedIOException(e);
} catch (AmazonClientException e) {
throw new AwsFailureException(e);
} catch (InterruptedException e) {
// Shouldn't happen, as our apps don't use thread interrupts.
throw new BadCodeMonkeyException(e);
}
}
Aggregations