use of edu.harvard.iq.dataverse.Dataset in project dataverse by IQSS.
the class SwiftAccessIO method initializeSwiftFileObject.
private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxItemTag) throws IOException {
String swiftEndPoint = null;
String swiftContainerName = null;
String swiftFileName = null;
StoredObject fileObject;
List<String> auxFiles = null;
String storageIdentifier = dvObject.getStorageIdentifier();
if (dvObject instanceof DataFile) {
Dataset owner = this.getDataFile().getOwner();
if (storageIdentifier.startsWith("swift://")) {
// This is a call on an already existing swift object.
String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3);
if (swiftStorageTokens.length != 3) {
// bad storage identifier
throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
}
swiftEndPoint = swiftStorageTokens[0];
swiftContainerName = swiftStorageTokens[1];
swiftFileName = swiftStorageTokens[2];
if (StringUtil.isEmpty(swiftEndPoint) || StringUtil.isEmpty(swiftContainerName) || StringUtil.isEmpty(swiftFileName)) {
// identifier.
throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
}
if (auxItemTag != null) {
swiftFileName = swiftFileName.concat("." + auxItemTag);
}
} else if (this.isReadAccess) {
// object!
throw new IOException("IO driver mismatch: SwiftAccessIO called on a non-swift stored object.");
} else if (this.isWriteAccess) {
Properties p = getSwiftProperties();
swiftEndPoint = p.getProperty("swift.default.endpoint");
// swiftFolderPath = dataFile.getOwner().getDisplayName();
String swiftFolderPathSeparator = "-";
String authorityNoSlashes = owner.getAuthority().replace(owner.getDoiSeparator(), swiftFolderPathSeparator);
swiftFolderPath = owner.getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + owner.getIdentifier();
swiftFileName = storageIdentifier;
// setSwiftContainerName(swiftFolderPath);
// swiftFileName = dataFile.getDisplayName();
// Storage Identifier is now updated after the object is uploaded on Swift.
dvObject.setStorageIdentifier("swift://" + swiftEndPoint + ":" + swiftFolderPath + ":" + swiftFileName);
} else {
throw new IOException("SwiftAccessIO: unknown access mode.");
}
} else if (dvObject instanceof Dataset) {
Dataset dataset = this.getDataset();
if (storageIdentifier.startsWith("swift://")) {
// This is a call on an already existing swift object.
// TODO: determine how storage identifer will give us info
String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3);
// number of tokens should be two because there is not main file
if (swiftStorageTokens.length != 2) {
// bad storage identifier
throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
}
swiftEndPoint = swiftStorageTokens[0];
swiftContainerName = swiftStorageTokens[1];
// We will not have a file name, just an aux tag
if (auxItemTag != null) {
swiftFileName = auxItemTag;
} else {
throw new IOException("Dataset related auxillary files require an auxItemTag");
}
if (StringUtil.isEmpty(swiftEndPoint) || StringUtil.isEmpty(swiftContainerName) || StringUtil.isEmpty(swiftFileName)) {
// identifier.1
throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
}
} else if (this.isReadAccess) {
// object!
throw new IOException("IO driver mismatch: SwiftAccessIO called on a non-swift stored object.");
} else if (this.isWriteAccess) {
Properties p = getSwiftProperties();
swiftEndPoint = p.getProperty("swift.default.endpoint");
String swiftFolderPathSeparator = "-";
String authorityNoSlashes = dataset.getAuthority().replace(dataset.getDoiSeparator(), swiftFolderPathSeparator);
swiftFolderPath = dataset.getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + dataset.getIdentifier();
swiftFileName = auxItemTag;
dvObject.setStorageIdentifier("swift://" + swiftEndPoint + ":" + swiftFolderPath);
} else {
throw new IOException("SwiftAccessIO: unknown access mode.");
}
} else {
// for future scope, if dataverse is decided to be stored in swift storage containersopen
throw new FileNotFoundException("Error initializing swift object");
}
if (this.account == null) {
account = authenticateWithSwift(swiftEndPoint);
}
/*
The containers created is swiftEndPoint concatenated with the swiftContainerName
property. Creating container with certain names throws 'Unable to create
container' error on Openstack.
Any datafile with http://rdgw storage identifier i.e present on Object
store service endpoint already only needs to look-up for container using
just swiftContainerName which is the concatenated name.
In future, a container for the endpoint can be created and for every
other swiftContainerName Object Store pseudo-folder can be created, which is
not provide by the joss Java swift library as of yet.
*/
if (storageIdentifier.startsWith("swift://")) {
// An existing swift object; the container must already exist as well.
this.swiftContainer = account.getContainer(swiftContainerName);
} else {
// This is a new object being created.
// changed from swiftendpoint
this.swiftContainer = account.getContainer(swiftFolderPath);
}
if (!this.swiftContainer.exists()) {
if (writeAccess) {
// creates a private data container
swiftContainer.create();
// try {
// //creates a public data container
// this.swiftContainer.makePublic();
// }
// catch (Exception e){
// //e.printStackTrace();
// logger.warning("Caught exception "+e.getClass()+" while creating a swift container (it's likely not fatal!)");
// }
} else {
// read an existing object!
throw new IOException("SwiftAccessIO: container " + swiftContainerName + " does not exist.");
}
}
fileObject = this.swiftContainer.getObject(swiftFileName);
// object for a primary file), we also set the file download url here:
if (auxItemTag == null && dvObject instanceof DataFile) {
setRemoteUrl(getSwiftFileURI(fileObject));
if (!this.isWriteAccess && !this.getDataFile().isIngestInProgress()) {
// otherwise this gets called a bunch on upload
setTemporarySwiftUrl(generateTemporarySwiftUrl(swiftEndPoint, swiftContainerName, swiftFileName, TEMP_URL_EXPIRES));
setTempUrlSignature(generateTempUrlSignature(swiftEndPoint, swiftContainerName, swiftFileName, TEMP_URL_EXPIRES));
setTempUrlExpiry(generateTempUrlExpiry(TEMP_URL_EXPIRES, System.currentTimeMillis()));
}
setSwiftFileName(swiftFileName);
logger.fine(getRemoteUrl() + " success; write mode: " + writeAccess);
} else {
logger.fine("sucessfully opened AUX object " + auxItemTag + " , write mode: " + writeAccess);
}
if (!writeAccess && !fileObject.exists()) {
throw new FileNotFoundException("SwiftAccessIO: DvObject " + swiftFileName + " does not exist (Dataverse dvObject id: " + dvObject.getId());
}
auxFiles = null;
return fileObject;
}
use of edu.harvard.iq.dataverse.Dataset in project dataverse by IQSS.
the class DestroyDatasetCommand method executeImpl.
@Override
protected void executeImpl(CommandContext ctxt) throws CommandException {
// first check if dataset is released, and if so, if user is a superuser
if (doomed.isReleased() && (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser())) {
throw new PermissionException("Destroy can only be called by superusers.", this, Collections.singleton(Permission.DeleteDatasetDraft), doomed);
}
// If there is a dedicated thumbnail DataFile, it needs to be reset
// explicitly, or we'll get a constraint violation when deleting:
doomed.setThumbnailFile(null);
final Dataset managedDoomed = ctxt.em().merge(doomed);
List<String> datasetAndFileSolrIdsToDelete = new ArrayList<>();
// files need to iterate through and remove 'by hand' to avoid
// optimistic lock issues... (plus the physical files need to be
// deleted too!)
Iterator<DataFile> dfIt = doomed.getFiles().iterator();
while (dfIt.hasNext()) {
DataFile df = dfIt.next();
// Gather potential Solr IDs of files. As of this writing deaccessioned files are never indexed.
String solrIdOfPublishedFile = IndexServiceBean.solrDocIdentifierFile + df.getId();
datasetAndFileSolrIdsToDelete.add(solrIdOfPublishedFile);
String solrIdOfDraftFile = IndexServiceBean.solrDocIdentifierFile + df.getId() + IndexServiceBean.draftSuffix;
datasetAndFileSolrIdsToDelete.add(solrIdOfDraftFile);
ctxt.engine().submit(new DeleteDataFileCommand(df, getRequest(), true));
dfIt.remove();
}
// also, lets delete the uploaded thumbnails!
deleteDatasetLogo(doomed);
// ASSIGNMENTS
for (RoleAssignment ra : ctxt.roles().directRoleAssignments(doomed)) {
ctxt.em().remove(ra);
}
// ROLES
for (DataverseRole ra : ctxt.roles().findByOwnerId(doomed.getId())) {
ctxt.em().remove(ra);
}
IdServiceBean idServiceBean = IdServiceBean.getBean(ctxt);
try {
if (idServiceBean.alreadyExists(doomed)) {
idServiceBean.deleteIdentifier(doomed);
}
} catch (Exception e) {
logger.log(Level.WARNING, "Identifier deletion was not successfull:", e.getMessage());
}
Dataverse toReIndex = managedDoomed.getOwner();
// dataset
ctxt.em().remove(managedDoomed);
// add potential Solr IDs of datasets to list for deletion
String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId();
datasetAndFileSolrIdsToDelete.add(solrIdOfPublishedDatasetVersion);
String solrIdOfDraftDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.draftSuffix;
datasetAndFileSolrIdsToDelete.add(solrIdOfDraftDatasetVersion);
String solrIdOfDeaccessionedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.deaccessionedSuffix;
datasetAndFileSolrIdsToDelete.add(solrIdOfDeaccessionedDatasetVersion);
IndexResponse resultOfSolrDeletionAttempt = ctxt.solrIndex().deleteMultipleSolrIds(datasetAndFileSolrIdsToDelete);
logger.log(Level.FINE, "Result of attempt to delete dataset and file IDs from the search index: {0}", resultOfSolrDeletionAttempt.getMessage());
ctxt.index().indexDataverse(toReIndex);
}
use of edu.harvard.iq.dataverse.Dataset in project dataverse by IQSS.
the class IngestServiceBean method addFiles.
// This method tries to permanently store the files on the filesystem.
// It should be called before we attempt to permanently save the files in
// the database by calling the Save command on the dataset and/or version.
// TODO: rename the method finalizeFiles()? or something like that?
public void addFiles(DatasetVersion version, List<DataFile> newFiles) {
if (newFiles != null && newFiles.size() > 0) {
// final check for duplicate file names;
// we tried to make the file names unique on upload, but then
// the user may have edited them on the "add files" page, and
// renamed FOOBAR-1.txt back to FOOBAR.txt...
IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles);
Dataset dataset = version.getDataset();
for (DataFile dataFile : newFiles) {
String tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier();
// These are all brand new files, so they should all have
// one filemetadata total. -- L.A.
FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
String fileName = fileMetadata.getLabel();
// hasn't been done yet:
if (dataFile.getOwner() == null) {
dataFile.setOwner(dataset);
version.getFileMetadatas().add(dataFile.getFileMetadata());
dataFile.getFileMetadata().setDatasetVersion(version);
dataset.getFiles().add(dataFile);
}
boolean metadataExtracted = false;
if (FileUtil.ingestableAsTabular(dataFile)) {
/*
* Note that we don't try to ingest the file right away -
* instead we mark it as "scheduled for ingest", then at
* the end of the save process it will be queued for async.
* ingest in the background. In the meantime, the file
* will be ingested as a regular, non-tabular file, and
* appear as such to the user, until the ingest job is
* finished with the Ingest Service.
*/
dataFile.SetIngestScheduled();
} else if (fileMetadataExtractable(dataFile)) {
try {
// FITS is the only type supported for metadata
// extraction, as of now. -- L.A. 4.0
dataFile.setContentType("application/fits");
metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
} catch (IOException mex) {
logger.severe("Caught exception trying to extract indexable metadata from file " + fileName + ", " + mex.getMessage());
}
if (metadataExtracted) {
logger.fine("Successfully extracted indexable metadata from file " + fileName);
} else {
logger.fine("Failed to extract indexable metadata from file " + fileName);
}
}
// Try to save the file in its permanent location:
String storageId = dataFile.getStorageIdentifier().replaceFirst("^tmp://", "");
Path tempLocationPath = Paths.get(FileUtil.getFilesTempDirectory() + "/" + storageId);
WritableByteChannel writeChannel = null;
FileChannel readChannel = null;
boolean localFile = false;
boolean savedSuccess = false;
StorageIO<DataFile> dataAccess = null;
try {
logger.fine("Attempting to create a new storageIO object for " + storageId);
dataAccess = DataAccess.createNewStorageIO(dataFile, storageId);
if (dataAccess.isLocalFile()) {
localFile = true;
}
logger.fine("Successfully created a new storageIO object.");
/*
This commented-out code demonstrates how to copy bytes
from a local InputStream (or a readChannel) into the
writable byte channel of a Dataverse DataAccessIO object:
*/
/*
storageIO.open(DataAccessOption.WRITE_ACCESS);
writeChannel = storageIO.getWriteChannel();
readChannel = new FileInputStream(tempLocationPath.toFile()).getChannel();
long bytesPerIteration = 16 * 1024; // 16K bytes
long start = 0;
while ( start < readChannel.size() ) {
readChannel.transferTo(start, bytesPerIteration, writeChannel);
start += bytesPerIteration;
}
*/
/*
But it's easier to use this convenience method from the
DataAccessIO:
(if the underlying storage method for this file is
local filesystem, the DataAccessIO will simply copy
the file using Files.copy, like this:
Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), StandardCopyOption.REPLACE_EXISTING);
*/
dataAccess.savePath(tempLocationPath);
// Set filesize in bytes
//
dataFile.setFilesize(dataAccess.getSize());
savedSuccess = true;
logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
} catch (IOException ioex) {
logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
} finally {
if (readChannel != null) {
try {
readChannel.close();
} catch (IOException e) {
}
}
if (writeChannel != null) {
try {
writeChannel.close();
} catch (IOException e) {
}
}
}
// Since we may have already spent some CPU cycles scaling down image thumbnails,
// we may as well save them, by moving these generated images to the permanent
// dataset directory. We should also remember to delete any such files in the
// temp directory:
List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), storageId);
if (generatedTempFiles != null) {
for (Path generated : generatedTempFiles) {
if (savedSuccess) {
// && localFile) {
logger.fine("(Will also try to permanently save generated thumbnail file " + generated.toString() + ")");
try {
// Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), generated.getFileName().toString()));
int i = generated.toString().lastIndexOf("thumb");
if (i > 1) {
String extensionTag = generated.toString().substring(i);
dataAccess.savePathAsAux(generated, extensionTag);
logger.fine("Saved generated thumbnail as aux object. \"preview available\" status: " + dataFile.isPreviewImageAvailable());
} else {
logger.warning("Generated thumbnail file name does not match the expected pattern: " + generated.toString());
}
} catch (IOException ioex) {
logger.warning("Failed to save generated file " + generated.toString());
}
try {
Files.delete(generated);
} catch (IOException ioex) {
logger.warning("Failed to delete generated file " + generated.toString());
}
}
}
}
try {
logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
Files.delete(tempLocationPath);
} catch (IOException ex) {
// (non-fatal - it's just a temp file.)
logger.warning("Failed to delete temp file " + tempLocationPath.toString());
}
// Any necessary post-processing:
// performPostProcessingTasks(dataFile);
}
logger.fine("Done! Finished saving new files in permanent storage.");
}
}
use of edu.harvard.iq.dataverse.Dataset in project dataverse by IQSS.
the class HarvesterServiceBean method deleteHarvestedDataset.
private void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Logger hdLogger) {
// Purge all the SOLR documents associated with this client from the
// index server:
indexService.deleteHarvestedDocuments(dataset);
try {
// DeleteFileCommand on them.
for (DataFile harvestedFile : dataset.getFiles()) {
DataFile merged = em.merge(harvestedFile);
em.remove(merged);
harvestedFile = null;
}
dataset.setFiles(null);
Dataset merged = em.merge(dataset);
engineService.submit(new DeleteDatasetCommand(request, merged));
} catch (IllegalCommandException ex) {
// TODO: log the result
} catch (PermissionException ex) {
// TODO: log the result
} catch (CommandException ex) {
// TODO: log the result
}
// TODO: log the success result
}
use of edu.harvard.iq.dataverse.Dataset in project dataverse by IQSS.
the class OAIRecordServiceBean method updateOaiRecords.
public void updateOaiRecords(String setName, List<Long> datasetIds, Date updateTime, boolean doExport, Logger setUpdateLogger) {
// create Map of OaiRecords
List<OAIRecord> oaiRecords = findOaiRecordsBySetName(setName);
Map<String, OAIRecord> recordMap = new HashMap<>();
if (oaiRecords != null) {
for (OAIRecord record : oaiRecords) {
// look for duplicates here? delete?
recordMap.put(record.getGlobalId(), record);
}
} else {
setUpdateLogger.fine("Null returned - no records found.");
}
if (!recordMap.isEmpty()) {
setUpdateLogger.fine("Found " + recordMap.size() + " existing records");
} else {
setUpdateLogger.fine("No records in the set yet.");
}
if (datasetIds != null) {
for (Long datasetId : datasetIds) {
setUpdateLogger.fine("processing dataset id=" + datasetId);
Dataset dataset = datasetService.find(datasetId);
if (dataset == null) {
setUpdateLogger.fine("failed to find dataset!");
} else {
setUpdateLogger.fine("found dataset.");
// TODO: option to *force* export?
if (doExport) {
// TODO:
// Review this logic - specifically for handling of
// deaccessioned datasets. -- L.A. 4.5
// OK, it looks like we can't rely on .getPublicationDate() -
// as it is essentially the *first publication* date;
// and we are interested in the *last*
DatasetVersion releasedVersion = dataset.getReleasedVersion();
Date publicationDate = releasedVersion == null ? null : releasedVersion.getReleaseTime();
if (publicationDate != null && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(publicationDate))) {
setUpdateLogger.fine("Attempting to run export on dataset " + dataset.getGlobalId());
exportAllFormats(dataset);
}
}
setUpdateLogger.fine("\"last exported\" timestamp: " + dataset.getLastExportTime());
em.refresh(dataset);
setUpdateLogger.fine("\"last exported\" timestamp, after db refresh: " + dataset.getLastExportTime());
updateOaiRecordForDataset(dataset, setName, recordMap, setUpdateLogger);
}
}
}
// anything left in the map should be marked as removed!
markOaiRecordsAsRemoved(recordMap.values(), updateTime, setUpdateLogger);
}
Aggregations