use of bio.terra.common.exception.PdaoFileCopyException in project jade-data-repo by DataBiosphere.
the class GcsPdao method copyFile.
public FSFileInfo copyFile(Dataset dataset, FileLoadModel fileLoadModel, String fileId, GoogleBucketResource bucketResource) {
Storage storage = storageForBucket(bucketResource);
Blob sourceBlob = getBlobFromGsPath(storage, fileLoadModel.getSourcePath());
// Our path is /<dataset-id>/<file-id>
String targetPath = dataset.getId().toString() + "/" + fileId;
try {
// The documentation is vague whether or not it is important to copy by chunk. One set of
// examples does it and another doesn't.
//
// I have been seeing timeouts and I think they are due to particularly large files,
// so I changed exported the timeouts to application.properties to allow for tuning
// and I am changing this to copy chunks.
CopyWriter writer = sourceBlob.copyTo(BlobId.of(bucketResource.getName(), targetPath));
while (!writer.isDone()) {
writer.copyChunk();
}
Blob targetBlob = writer.getResult();
// MD5 is computed per-component. So if there are multiple components, the MD5 here is
// not useful for validating the contents of the file on access. Therefore, we only
// return the MD5 if there is only a single component. For more details,
// see https://cloud.google.com/storage/docs/hashes-etags
Integer componentCount = targetBlob.getComponentCount();
String checksumMd5 = null;
if (componentCount == null || componentCount == 1) {
checksumMd5 = targetBlob.getMd5ToHexString();
}
// Grumble! It is not documented what the meaning of the Long is.
// From poking around I think it is a standard POSIX milliseconds since Jan 1, 1970.
Instant createTime = Instant.ofEpochMilli(targetBlob.getCreateTime());
URI gspath = new URI("gs", bucketResource.getName(), "/" + targetPath, null, null);
FSFileInfo fsFileInfo = new FSFileInfo().fileId(fileId).createdDate(createTime.toString()).gspath(gspath.toString()).checksumCrc32c(targetBlob.getCrc32cToHexString()).checksumMd5(checksumMd5).size(targetBlob.getSize()).bucketResourceId(bucketResource.getResourceId().toString());
return fsFileInfo;
} catch (StorageException ex) {
// for flaky google case or we might need to bail out if access is denied.
throw new PdaoFileCopyException("File ingest failed", ex);
} catch (URISyntaxException ex) {
throw new PdaoException("Bad URI of our own making", ex);
}
}
Aggregations