use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class AddReplaceFileHelper method setNewlyAddedFiles.
/**
* We want the version of the newly added file that has an id set
*
* TODO: This is inefficient/expensive. Need to redo it in a sane way
* - e.g. Query to find
* (1) latest dataset version in draft
* (2) pick off files that are NOT released
* (3) iterate through only those files
* - or an alternate/better version
*
* @param df
*/
private void setNewlyAddedFiles(List<DataFile> datafiles) {
if (hasError()) {
return;
}
// Init. newly added file list
newlyAddedFiles = new ArrayList<>();
newlyAddedFileMetadatas = new ArrayList<>();
// Loop of uglinesss...but expect 1 to 4 files in final file list
List<FileMetadata> latestFileMetadatas = dataset.getEditVersion().getFileMetadatas();
for (DataFile newlyAddedFile : finalFileList) {
for (FileMetadata fm : latestFileMetadatas) {
if (newlyAddedFile.getChecksumValue().equals(fm.getDataFile().getChecksumValue())) {
if (newlyAddedFile.getStorageIdentifier().equals(fm.getDataFile().getStorageIdentifier())) {
newlyAddedFiles.add(fm.getDataFile());
newlyAddedFileMetadatas.add(fm);
}
}
}
}
/*
newlyAddedFile = df;
for (FileMetadata fm : dataset.getEditVersion().getFileMetadatas()){
// Find a file where the checksum value and identifiers are the same..
//
if (newlyAddedFile.getChecksumValue().equals(fm.getDataFile().getChecksumValue())){
if (newlyAddedFile.getStorageIdentifier().equals(fm.getDataFile().getStorageIdentifier())){
newlyAddedFile = fm.getDataFile();
break;
}
}
}
*/
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class SwiftAccessIO method initializeSwiftFileObject.
private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxItemTag) throws IOException {
String swiftEndPoint = null;
String swiftContainerName = null;
String swiftFileName = null;
StoredObject fileObject;
List<String> auxFiles = null;
String storageIdentifier = dvObject.getStorageIdentifier();
if (dvObject instanceof DataFile) {
Dataset owner = this.getDataFile().getOwner();
if (storageIdentifier.startsWith("swift://")) {
// This is a call on an already existing swift object.
String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3);
if (swiftStorageTokens.length != 3) {
// bad storage identifier
throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
}
swiftEndPoint = swiftStorageTokens[0];
swiftContainerName = swiftStorageTokens[1];
swiftFileName = swiftStorageTokens[2];
if (StringUtil.isEmpty(swiftEndPoint) || StringUtil.isEmpty(swiftContainerName) || StringUtil.isEmpty(swiftFileName)) {
// identifier.
throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
}
if (auxItemTag != null) {
swiftFileName = swiftFileName.concat("." + auxItemTag);
}
} else if (this.isReadAccess) {
// object!
throw new IOException("IO driver mismatch: SwiftAccessIO called on a non-swift stored object.");
} else if (this.isWriteAccess) {
Properties p = getSwiftProperties();
swiftEndPoint = p.getProperty("swift.default.endpoint");
// swiftFolderPath = dataFile.getOwner().getDisplayName();
String swiftFolderPathSeparator = "-";
String authorityNoSlashes = owner.getAuthority().replace(owner.getDoiSeparator(), swiftFolderPathSeparator);
swiftFolderPath = owner.getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + owner.getIdentifier();
swiftFileName = storageIdentifier;
// setSwiftContainerName(swiftFolderPath);
// swiftFileName = dataFile.getDisplayName();
// Storage Identifier is now updated after the object is uploaded on Swift.
dvObject.setStorageIdentifier("swift://" + swiftEndPoint + ":" + swiftFolderPath + ":" + swiftFileName);
} else {
throw new IOException("SwiftAccessIO: unknown access mode.");
}
} else if (dvObject instanceof Dataset) {
Dataset dataset = this.getDataset();
if (storageIdentifier.startsWith("swift://")) {
// This is a call on an already existing swift object.
// TODO: determine how storage identifer will give us info
String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3);
// number of tokens should be two because there is not main file
if (swiftStorageTokens.length != 2) {
// bad storage identifier
throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
}
swiftEndPoint = swiftStorageTokens[0];
swiftContainerName = swiftStorageTokens[1];
// We will not have a file name, just an aux tag
if (auxItemTag != null) {
swiftFileName = auxItemTag;
} else {
throw new IOException("Dataset related auxillary files require an auxItemTag");
}
if (StringUtil.isEmpty(swiftEndPoint) || StringUtil.isEmpty(swiftContainerName) || StringUtil.isEmpty(swiftFileName)) {
// identifier.1
throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
}
} else if (this.isReadAccess) {
// object!
throw new IOException("IO driver mismatch: SwiftAccessIO called on a non-swift stored object.");
} else if (this.isWriteAccess) {
Properties p = getSwiftProperties();
swiftEndPoint = p.getProperty("swift.default.endpoint");
String swiftFolderPathSeparator = "-";
String authorityNoSlashes = dataset.getAuthority().replace(dataset.getDoiSeparator(), swiftFolderPathSeparator);
swiftFolderPath = dataset.getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + dataset.getIdentifier();
swiftFileName = auxItemTag;
dvObject.setStorageIdentifier("swift://" + swiftEndPoint + ":" + swiftFolderPath);
} else {
throw new IOException("SwiftAccessIO: unknown access mode.");
}
} else {
// for future scope, if dataverse is decided to be stored in swift storage containersopen
throw new FileNotFoundException("Error initializing swift object");
}
if (this.account == null) {
account = authenticateWithSwift(swiftEndPoint);
}
/*
The containers created is swiftEndPoint concatenated with the swiftContainerName
property. Creating container with certain names throws 'Unable to create
container' error on Openstack.
Any datafile with http://rdgw storage identifier i.e present on Object
store service endpoint already only needs to look-up for container using
just swiftContainerName which is the concatenated name.
In future, a container for the endpoint can be created and for every
other swiftContainerName Object Store pseudo-folder can be created, which is
not provide by the joss Java swift library as of yet.
*/
if (storageIdentifier.startsWith("swift://")) {
// An existing swift object; the container must already exist as well.
this.swiftContainer = account.getContainer(swiftContainerName);
} else {
// This is a new object being created.
// changed from swiftendpoint
this.swiftContainer = account.getContainer(swiftFolderPath);
}
if (!this.swiftContainer.exists()) {
if (writeAccess) {
// creates a private data container
swiftContainer.create();
// try {
// //creates a public data container
// this.swiftContainer.makePublic();
// }
// catch (Exception e){
// //e.printStackTrace();
// logger.warning("Caught exception "+e.getClass()+" while creating a swift container (it's likely not fatal!)");
// }
} else {
// read an existing object!
throw new IOException("SwiftAccessIO: container " + swiftContainerName + " does not exist.");
}
}
fileObject = this.swiftContainer.getObject(swiftFileName);
// object for a primary file), we also set the file download url here:
if (auxItemTag == null && dvObject instanceof DataFile) {
setRemoteUrl(getSwiftFileURI(fileObject));
if (!this.isWriteAccess && !this.getDataFile().isIngestInProgress()) {
// otherwise this gets called a bunch on upload
setTemporarySwiftUrl(generateTemporarySwiftUrl(swiftEndPoint, swiftContainerName, swiftFileName, TEMP_URL_EXPIRES));
setTempUrlSignature(generateTempUrlSignature(swiftEndPoint, swiftContainerName, swiftFileName, TEMP_URL_EXPIRES));
setTempUrlExpiry(generateTempUrlExpiry(TEMP_URL_EXPIRES, System.currentTimeMillis()));
}
setSwiftFileName(swiftFileName);
logger.fine(getRemoteUrl() + " success; write mode: " + writeAccess);
} else {
logger.fine("sucessfully opened AUX object " + auxItemTag + " , write mode: " + writeAccess);
}
if (!writeAccess && !fileObject.exists()) {
throw new FileNotFoundException("SwiftAccessIO: DvObject " + swiftFileName + " does not exist (Dataverse dvObject id: " + dvObject.getId());
}
auxFiles = null;
return fileObject;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class DatasetUtil method attemptToAutomaticallySelectThumbnailFromDataFiles.
/**
* Pass an optional datasetVersion in case the file system is checked
*
* @param dataset
* @param datasetVersion
* @return
*/
public static DataFile attemptToAutomaticallySelectThumbnailFromDataFiles(Dataset dataset, DatasetVersion datasetVersion) {
if (dataset == null) {
return null;
}
if (dataset.isUseGenericThumbnail()) {
logger.fine("Bypassing logic to find a thumbnail because a generic icon for the dataset is desired.");
return null;
}
if (datasetVersion == null) {
logger.fine("getting latest version of dataset");
datasetVersion = dataset.getLatestVersion();
}
for (FileMetadata fmd : datasetVersion.getFileMetadatas()) {
DataFile testFile = fmd.getDataFile();
if (FileUtil.isThumbnailSupported(testFile) && ImageThumbConverter.isThumbnailAvailable(testFile, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE)) {
return testFile;
}
}
logger.fine("In attemptToAutomaticallySelectThumbnailFromDataFiles and interated through all the files but couldn't find a thumbnail.");
return null;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class DestroyDatasetCommand method executeImpl.
@Override
protected void executeImpl(CommandContext ctxt) throws CommandException {
// first check if dataset is released, and if so, if user is a superuser
if (doomed.isReleased() && (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser())) {
throw new PermissionException("Destroy can only be called by superusers.", this, Collections.singleton(Permission.DeleteDatasetDraft), doomed);
}
// If there is a dedicated thumbnail DataFile, it needs to be reset
// explicitly, or we'll get a constraint violation when deleting:
doomed.setThumbnailFile(null);
final Dataset managedDoomed = ctxt.em().merge(doomed);
List<String> datasetAndFileSolrIdsToDelete = new ArrayList<>();
// files need to iterate through and remove 'by hand' to avoid
// optimistic lock issues... (plus the physical files need to be
// deleted too!)
Iterator<DataFile> dfIt = doomed.getFiles().iterator();
while (dfIt.hasNext()) {
DataFile df = dfIt.next();
// Gather potential Solr IDs of files. As of this writing deaccessioned files are never indexed.
String solrIdOfPublishedFile = IndexServiceBean.solrDocIdentifierFile + df.getId();
datasetAndFileSolrIdsToDelete.add(solrIdOfPublishedFile);
String solrIdOfDraftFile = IndexServiceBean.solrDocIdentifierFile + df.getId() + IndexServiceBean.draftSuffix;
datasetAndFileSolrIdsToDelete.add(solrIdOfDraftFile);
ctxt.engine().submit(new DeleteDataFileCommand(df, getRequest(), true));
dfIt.remove();
}
// also, lets delete the uploaded thumbnails!
deleteDatasetLogo(doomed);
// ASSIGNMENTS
for (RoleAssignment ra : ctxt.roles().directRoleAssignments(doomed)) {
ctxt.em().remove(ra);
}
// ROLES
for (DataverseRole ra : ctxt.roles().findByOwnerId(doomed.getId())) {
ctxt.em().remove(ra);
}
IdServiceBean idServiceBean = IdServiceBean.getBean(ctxt);
try {
if (idServiceBean.alreadyExists(doomed)) {
idServiceBean.deleteIdentifier(doomed);
}
} catch (Exception e) {
logger.log(Level.WARNING, "Identifier deletion was not successfull:", e.getMessage());
}
Dataverse toReIndex = managedDoomed.getOwner();
// dataset
ctxt.em().remove(managedDoomed);
// add potential Solr IDs of datasets to list for deletion
String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId();
datasetAndFileSolrIdsToDelete.add(solrIdOfPublishedDatasetVersion);
String solrIdOfDraftDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.draftSuffix;
datasetAndFileSolrIdsToDelete.add(solrIdOfDraftDatasetVersion);
String solrIdOfDeaccessionedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.deaccessionedSuffix;
datasetAndFileSolrIdsToDelete.add(solrIdOfDeaccessionedDatasetVersion);
IndexResponse resultOfSolrDeletionAttempt = ctxt.solrIndex().deleteMultipleSolrIds(datasetAndFileSolrIdsToDelete);
logger.log(Level.FINE, "Result of attempt to delete dataset and file IDs from the search index: {0}", resultOfSolrDeletionAttempt.getMessage());
ctxt.index().indexDataverse(toReIndex);
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class IngestServiceBean method addFiles.
// This method tries to permanently store the files on the filesystem.
// It should be called before we attempt to permanently save the files in
// the database by calling the Save command on the dataset and/or version.
// TODO: rename the method finalizeFiles()? or something like that?
public void addFiles(DatasetVersion version, List<DataFile> newFiles) {
if (newFiles != null && newFiles.size() > 0) {
// final check for duplicate file names;
// we tried to make the file names unique on upload, but then
// the user may have edited them on the "add files" page, and
// renamed FOOBAR-1.txt back to FOOBAR.txt...
IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles);
Dataset dataset = version.getDataset();
for (DataFile dataFile : newFiles) {
String tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier();
// These are all brand new files, so they should all have
// one filemetadata total. -- L.A.
FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
String fileName = fileMetadata.getLabel();
// hasn't been done yet:
if (dataFile.getOwner() == null) {
dataFile.setOwner(dataset);
version.getFileMetadatas().add(dataFile.getFileMetadata());
dataFile.getFileMetadata().setDatasetVersion(version);
dataset.getFiles().add(dataFile);
}
boolean metadataExtracted = false;
if (FileUtil.ingestableAsTabular(dataFile)) {
/*
* Note that we don't try to ingest the file right away -
* instead we mark it as "scheduled for ingest", then at
* the end of the save process it will be queued for async.
* ingest in the background. In the meantime, the file
* will be ingested as a regular, non-tabular file, and
* appear as such to the user, until the ingest job is
* finished with the Ingest Service.
*/
dataFile.SetIngestScheduled();
} else if (fileMetadataExtractable(dataFile)) {
try {
// FITS is the only type supported for metadata
// extraction, as of now. -- L.A. 4.0
dataFile.setContentType("application/fits");
metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
} catch (IOException mex) {
logger.severe("Caught exception trying to extract indexable metadata from file " + fileName + ", " + mex.getMessage());
}
if (metadataExtracted) {
logger.fine("Successfully extracted indexable metadata from file " + fileName);
} else {
logger.fine("Failed to extract indexable metadata from file " + fileName);
}
}
// Try to save the file in its permanent location:
String storageId = dataFile.getStorageIdentifier().replaceFirst("^tmp://", "");
Path tempLocationPath = Paths.get(FileUtil.getFilesTempDirectory() + "/" + storageId);
WritableByteChannel writeChannel = null;
FileChannel readChannel = null;
boolean localFile = false;
boolean savedSuccess = false;
StorageIO<DataFile> dataAccess = null;
try {
logger.fine("Attempting to create a new storageIO object for " + storageId);
dataAccess = DataAccess.createNewStorageIO(dataFile, storageId);
if (dataAccess.isLocalFile()) {
localFile = true;
}
logger.fine("Successfully created a new storageIO object.");
/*
This commented-out code demonstrates how to copy bytes
from a local InputStream (or a readChannel) into the
writable byte channel of a Dataverse DataAccessIO object:
*/
/*
storageIO.open(DataAccessOption.WRITE_ACCESS);
writeChannel = storageIO.getWriteChannel();
readChannel = new FileInputStream(tempLocationPath.toFile()).getChannel();
long bytesPerIteration = 16 * 1024; // 16K bytes
long start = 0;
while ( start < readChannel.size() ) {
readChannel.transferTo(start, bytesPerIteration, writeChannel);
start += bytesPerIteration;
}
*/
/*
But it's easier to use this convenience method from the
DataAccessIO:
(if the underlying storage method for this file is
local filesystem, the DataAccessIO will simply copy
the file using Files.copy, like this:
Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), StandardCopyOption.REPLACE_EXISTING);
*/
dataAccess.savePath(tempLocationPath);
// Set filesize in bytes
//
dataFile.setFilesize(dataAccess.getSize());
savedSuccess = true;
logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
} catch (IOException ioex) {
logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
} finally {
if (readChannel != null) {
try {
readChannel.close();
} catch (IOException e) {
}
}
if (writeChannel != null) {
try {
writeChannel.close();
} catch (IOException e) {
}
}
}
// Since we may have already spent some CPU cycles scaling down image thumbnails,
// we may as well save them, by moving these generated images to the permanent
// dataset directory. We should also remember to delete any such files in the
// temp directory:
List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), storageId);
if (generatedTempFiles != null) {
for (Path generated : generatedTempFiles) {
if (savedSuccess) {
// && localFile) {
logger.fine("(Will also try to permanently save generated thumbnail file " + generated.toString() + ")");
try {
// Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), generated.getFileName().toString()));
int i = generated.toString().lastIndexOf("thumb");
if (i > 1) {
String extensionTag = generated.toString().substring(i);
dataAccess.savePathAsAux(generated, extensionTag);
logger.fine("Saved generated thumbnail as aux object. \"preview available\" status: " + dataFile.isPreviewImageAvailable());
} else {
logger.warning("Generated thumbnail file name does not match the expected pattern: " + generated.toString());
}
} catch (IOException ioex) {
logger.warning("Failed to save generated file " + generated.toString());
}
try {
Files.delete(generated);
} catch (IOException ioex) {
logger.warning("Failed to delete generated file " + generated.toString());
}
}
}
}
try {
logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
Files.delete(tempLocationPath);
} catch (IOException ex) {
// (non-fatal - it's just a temp file.)
logger.warning("Failed to delete temp file " + tempLocationPath.toString());
}
// Any necessary post-processing:
// performPostProcessingTasks(dataFile);
}
logger.fine("Done! Finished saving new files in permanent storage.");
}
}
Aggregations