Search in sources :

Example 41 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class AddReplaceFileHelper method setNewlyAddedFiles.

/**
 * We want the version of the newly added file that has an id set
 *
 * TODO: This is inefficient/expensive.  Need to redo it in a sane way
 *      - e.g. Query to find
 *          (1) latest dataset version in draft
 *          (2) pick off files that are NOT released
 *          (3) iterate through only those files
 *      - or an alternate/better version
 *
 * @param df
 */
private void setNewlyAddedFiles(List<DataFile> datafiles) {
    if (hasError()) {
        return;
    }
    // Init. newly added file list
    newlyAddedFiles = new ArrayList<>();
    newlyAddedFileMetadatas = new ArrayList<>();
    // Loop of uglinesss...but expect 1 to 4 files in final file list
    List<FileMetadata> latestFileMetadatas = dataset.getEditVersion().getFileMetadatas();
    for (DataFile newlyAddedFile : finalFileList) {
        for (FileMetadata fm : latestFileMetadatas) {
            if (newlyAddedFile.getChecksumValue().equals(fm.getDataFile().getChecksumValue())) {
                if (newlyAddedFile.getStorageIdentifier().equals(fm.getDataFile().getStorageIdentifier())) {
                    newlyAddedFiles.add(fm.getDataFile());
                    newlyAddedFileMetadatas.add(fm);
                }
            }
        }
    }
/*
       
        newlyAddedFile = df;
        
        for (FileMetadata fm : dataset.getEditVersion().getFileMetadatas()){
            
            // Find a file where the checksum value and identifiers are the same..
            //
            if (newlyAddedFile.getChecksumValue().equals(fm.getDataFile().getChecksumValue())){
                if (newlyAddedFile.getStorageIdentifier().equals(fm.getDataFile().getStorageIdentifier())){
                    newlyAddedFile = fm.getDataFile();
                    break;
                }
            }
        }
        */
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) FileMetadata(edu.harvard.iq.dataverse.FileMetadata)

Example 42 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class SwiftAccessIO method initializeSwiftFileObject.

private StoredObject initializeSwiftFileObject(boolean writeAccess, String auxItemTag) throws IOException {
    String swiftEndPoint = null;
    String swiftContainerName = null;
    String swiftFileName = null;
    StoredObject fileObject;
    List<String> auxFiles = null;
    String storageIdentifier = dvObject.getStorageIdentifier();
    if (dvObject instanceof DataFile) {
        Dataset owner = this.getDataFile().getOwner();
        if (storageIdentifier.startsWith("swift://")) {
            // This is a call on an already existing swift object.
            String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3);
            if (swiftStorageTokens.length != 3) {
                // bad storage identifier
                throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
            }
            swiftEndPoint = swiftStorageTokens[0];
            swiftContainerName = swiftStorageTokens[1];
            swiftFileName = swiftStorageTokens[2];
            if (StringUtil.isEmpty(swiftEndPoint) || StringUtil.isEmpty(swiftContainerName) || StringUtil.isEmpty(swiftFileName)) {
                // identifier.
                throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
            }
            if (auxItemTag != null) {
                swiftFileName = swiftFileName.concat("." + auxItemTag);
            }
        } else if (this.isReadAccess) {
            // object!
            throw new IOException("IO driver mismatch: SwiftAccessIO called on a non-swift stored object.");
        } else if (this.isWriteAccess) {
            Properties p = getSwiftProperties();
            swiftEndPoint = p.getProperty("swift.default.endpoint");
            // swiftFolderPath = dataFile.getOwner().getDisplayName();
            String swiftFolderPathSeparator = "-";
            String authorityNoSlashes = owner.getAuthority().replace(owner.getDoiSeparator(), swiftFolderPathSeparator);
            swiftFolderPath = owner.getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + owner.getIdentifier();
            swiftFileName = storageIdentifier;
            // setSwiftContainerName(swiftFolderPath);
            // swiftFileName = dataFile.getDisplayName();
            // Storage Identifier is now updated after the object is uploaded on Swift.
            dvObject.setStorageIdentifier("swift://" + swiftEndPoint + ":" + swiftFolderPath + ":" + swiftFileName);
        } else {
            throw new IOException("SwiftAccessIO: unknown access mode.");
        }
    } else if (dvObject instanceof Dataset) {
        Dataset dataset = this.getDataset();
        if (storageIdentifier.startsWith("swift://")) {
            // This is a call on an already existing swift object.
            // TODO: determine how storage identifer will give us info
            String[] swiftStorageTokens = storageIdentifier.substring(8).split(":", 3);
            // number of tokens should be two because there is not main file
            if (swiftStorageTokens.length != 2) {
                // bad storage identifier
                throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
            }
            swiftEndPoint = swiftStorageTokens[0];
            swiftContainerName = swiftStorageTokens[1];
            // We will not have a file name, just an aux tag
            if (auxItemTag != null) {
                swiftFileName = auxItemTag;
            } else {
                throw new IOException("Dataset related auxillary files require an auxItemTag");
            }
            if (StringUtil.isEmpty(swiftEndPoint) || StringUtil.isEmpty(swiftContainerName) || StringUtil.isEmpty(swiftFileName)) {
                // identifier.1
                throw new IOException("SwiftAccessIO: invalid swift storage token: " + storageIdentifier);
            }
        } else if (this.isReadAccess) {
            // object!
            throw new IOException("IO driver mismatch: SwiftAccessIO called on a non-swift stored object.");
        } else if (this.isWriteAccess) {
            Properties p = getSwiftProperties();
            swiftEndPoint = p.getProperty("swift.default.endpoint");
            String swiftFolderPathSeparator = "-";
            String authorityNoSlashes = dataset.getAuthority().replace(dataset.getDoiSeparator(), swiftFolderPathSeparator);
            swiftFolderPath = dataset.getProtocol() + swiftFolderPathSeparator + authorityNoSlashes.replace(".", swiftFolderPathSeparator) + swiftFolderPathSeparator + dataset.getIdentifier();
            swiftFileName = auxItemTag;
            dvObject.setStorageIdentifier("swift://" + swiftEndPoint + ":" + swiftFolderPath);
        } else {
            throw new IOException("SwiftAccessIO: unknown access mode.");
        }
    } else {
        // for future scope, if dataverse is decided to be stored in swift storage containersopen
        throw new FileNotFoundException("Error initializing swift object");
    }
    if (this.account == null) {
        account = authenticateWithSwift(swiftEndPoint);
    }
    /*
        The containers created is swiftEndPoint concatenated with the swiftContainerName
        property. Creating container with certain names throws 'Unable to create
        container' error on Openstack. 
        Any datafile with http://rdgw storage identifier i.e present on Object 
        store service endpoint already only needs to look-up for container using
        just swiftContainerName which is the concatenated name.
        In future, a container for the endpoint can be created and for every
        other swiftContainerName Object Store pseudo-folder can be created, which is
        not provide by the joss Java swift library as of yet.
         */
    if (storageIdentifier.startsWith("swift://")) {
        // An existing swift object; the container must already exist as well.
        this.swiftContainer = account.getContainer(swiftContainerName);
    } else {
        // This is a new object being created.
        // changed from swiftendpoint
        this.swiftContainer = account.getContainer(swiftFolderPath);
    }
    if (!this.swiftContainer.exists()) {
        if (writeAccess) {
            // creates a private data container
            swiftContainer.create();
        // try {
        // //creates a public data container
        // this.swiftContainer.makePublic();
        // }
        // catch (Exception e){
        // //e.printStackTrace();
        // logger.warning("Caught exception "+e.getClass()+" while creating a swift container (it's likely not fatal!)");
        // }
        } else {
            // read an existing object!
            throw new IOException("SwiftAccessIO: container " + swiftContainerName + " does not exist.");
        }
    }
    fileObject = this.swiftContainer.getObject(swiftFileName);
    // object for a primary file), we also set the file download url here:
    if (auxItemTag == null && dvObject instanceof DataFile) {
        setRemoteUrl(getSwiftFileURI(fileObject));
        if (!this.isWriteAccess && !this.getDataFile().isIngestInProgress()) {
            // otherwise this gets called a bunch on upload
            setTemporarySwiftUrl(generateTemporarySwiftUrl(swiftEndPoint, swiftContainerName, swiftFileName, TEMP_URL_EXPIRES));
            setTempUrlSignature(generateTempUrlSignature(swiftEndPoint, swiftContainerName, swiftFileName, TEMP_URL_EXPIRES));
            setTempUrlExpiry(generateTempUrlExpiry(TEMP_URL_EXPIRES, System.currentTimeMillis()));
        }
        setSwiftFileName(swiftFileName);
        logger.fine(getRemoteUrl() + " success; write mode: " + writeAccess);
    } else {
        logger.fine("sucessfully opened AUX object " + auxItemTag + " , write mode: " + writeAccess);
    }
    if (!writeAccess && !fileObject.exists()) {
        throw new FileNotFoundException("SwiftAccessIO: DvObject " + swiftFileName + " does not exist (Dataverse dvObject id: " + dvObject.getId());
    }
    auxFiles = null;
    return fileObject;
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) StoredObject(org.javaswift.joss.model.StoredObject) Dataset(edu.harvard.iq.dataverse.Dataset) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) Properties(java.util.Properties)

Example 43 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class DatasetUtil method attemptToAutomaticallySelectThumbnailFromDataFiles.

/**
 * Pass an optional datasetVersion in case the file system is checked
 *
 * @param dataset
 * @param datasetVersion
 * @return
 */
public static DataFile attemptToAutomaticallySelectThumbnailFromDataFiles(Dataset dataset, DatasetVersion datasetVersion) {
    if (dataset == null) {
        return null;
    }
    if (dataset.isUseGenericThumbnail()) {
        logger.fine("Bypassing logic to find a thumbnail because a generic icon for the dataset is desired.");
        return null;
    }
    if (datasetVersion == null) {
        logger.fine("getting latest version of dataset");
        datasetVersion = dataset.getLatestVersion();
    }
    for (FileMetadata fmd : datasetVersion.getFileMetadatas()) {
        DataFile testFile = fmd.getDataFile();
        if (FileUtil.isThumbnailSupported(testFile) && ImageThumbConverter.isThumbnailAvailable(testFile, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE)) {
            return testFile;
        }
    }
    logger.fine("In attemptToAutomaticallySelectThumbnailFromDataFiles and interated through all the files but couldn't find a thumbnail.");
    return null;
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) FileMetadata(edu.harvard.iq.dataverse.FileMetadata)

Example 44 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class DestroyDatasetCommand method executeImpl.

@Override
protected void executeImpl(CommandContext ctxt) throws CommandException {
    // first check if dataset is released, and if so, if user is a superuser
    if (doomed.isReleased() && (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser())) {
        throw new PermissionException("Destroy can only be called by superusers.", this, Collections.singleton(Permission.DeleteDatasetDraft), doomed);
    }
    // If there is a dedicated thumbnail DataFile, it needs to be reset
    // explicitly, or we'll get a constraint violation when deleting:
    doomed.setThumbnailFile(null);
    final Dataset managedDoomed = ctxt.em().merge(doomed);
    List<String> datasetAndFileSolrIdsToDelete = new ArrayList<>();
    // files need to iterate through and remove 'by hand' to avoid
    // optimistic lock issues... (plus the physical files need to be
    // deleted too!)
    Iterator<DataFile> dfIt = doomed.getFiles().iterator();
    while (dfIt.hasNext()) {
        DataFile df = dfIt.next();
        // Gather potential Solr IDs of files. As of this writing deaccessioned files are never indexed.
        String solrIdOfPublishedFile = IndexServiceBean.solrDocIdentifierFile + df.getId();
        datasetAndFileSolrIdsToDelete.add(solrIdOfPublishedFile);
        String solrIdOfDraftFile = IndexServiceBean.solrDocIdentifierFile + df.getId() + IndexServiceBean.draftSuffix;
        datasetAndFileSolrIdsToDelete.add(solrIdOfDraftFile);
        ctxt.engine().submit(new DeleteDataFileCommand(df, getRequest(), true));
        dfIt.remove();
    }
    // also, lets delete the uploaded thumbnails!
    deleteDatasetLogo(doomed);
    // ASSIGNMENTS
    for (RoleAssignment ra : ctxt.roles().directRoleAssignments(doomed)) {
        ctxt.em().remove(ra);
    }
    // ROLES
    for (DataverseRole ra : ctxt.roles().findByOwnerId(doomed.getId())) {
        ctxt.em().remove(ra);
    }
    IdServiceBean idServiceBean = IdServiceBean.getBean(ctxt);
    try {
        if (idServiceBean.alreadyExists(doomed)) {
            idServiceBean.deleteIdentifier(doomed);
        }
    } catch (Exception e) {
        logger.log(Level.WARNING, "Identifier deletion was not successfull:", e.getMessage());
    }
    Dataverse toReIndex = managedDoomed.getOwner();
    // dataset
    ctxt.em().remove(managedDoomed);
    // add potential Solr IDs of datasets to list for deletion
    String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId();
    datasetAndFileSolrIdsToDelete.add(solrIdOfPublishedDatasetVersion);
    String solrIdOfDraftDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.draftSuffix;
    datasetAndFileSolrIdsToDelete.add(solrIdOfDraftDatasetVersion);
    String solrIdOfDeaccessionedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.deaccessionedSuffix;
    datasetAndFileSolrIdsToDelete.add(solrIdOfDeaccessionedDatasetVersion);
    IndexResponse resultOfSolrDeletionAttempt = ctxt.solrIndex().deleteMultipleSolrIds(datasetAndFileSolrIdsToDelete);
    logger.log(Level.FINE, "Result of attempt to delete dataset and file IDs from the search index: {0}", resultOfSolrDeletionAttempt.getMessage());
    ctxt.index().indexDataverse(toReIndex);
}
Also used : PermissionException(edu.harvard.iq.dataverse.engine.command.exception.PermissionException) Dataset(edu.harvard.iq.dataverse.Dataset) RoleAssignment(edu.harvard.iq.dataverse.RoleAssignment) ArrayList(java.util.ArrayList) AuthenticatedUser(edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser) Dataverse(edu.harvard.iq.dataverse.Dataverse) PermissionException(edu.harvard.iq.dataverse.engine.command.exception.PermissionException) CommandException(edu.harvard.iq.dataverse.engine.command.exception.CommandException) DataverseRole(edu.harvard.iq.dataverse.authorization.DataverseRole) DataFile(edu.harvard.iq.dataverse.DataFile) IndexResponse(edu.harvard.iq.dataverse.search.IndexResponse) IdServiceBean(edu.harvard.iq.dataverse.IdServiceBean)

Example 45 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class IngestServiceBean method addFiles.

// This method tries to permanently store the files on the filesystem.
// It should be called before we attempt to permanently save the files in
// the database by calling the Save command on the dataset and/or version.
// TODO: rename the method finalizeFiles()? or something like that?
public void addFiles(DatasetVersion version, List<DataFile> newFiles) {
    if (newFiles != null && newFiles.size() > 0) {
        // final check for duplicate file names;
        // we tried to make the file names unique on upload, but then
        // the user may have edited them on the "add files" page, and
        // renamed FOOBAR-1.txt back to FOOBAR.txt...
        IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles);
        Dataset dataset = version.getDataset();
        for (DataFile dataFile : newFiles) {
            String tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier();
            // These are all brand new files, so they should all have
            // one filemetadata total. -- L.A.
            FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
            String fileName = fileMetadata.getLabel();
            // hasn't been done yet:
            if (dataFile.getOwner() == null) {
                dataFile.setOwner(dataset);
                version.getFileMetadatas().add(dataFile.getFileMetadata());
                dataFile.getFileMetadata().setDatasetVersion(version);
                dataset.getFiles().add(dataFile);
            }
            boolean metadataExtracted = false;
            if (FileUtil.ingestableAsTabular(dataFile)) {
                /*
                         * Note that we don't try to ingest the file right away - 
                         * instead we mark it as "scheduled for ingest", then at 
                         * the end of the save process it will be queued for async. 
                         * ingest in the background. In the meantime, the file 
                         * will be ingested as a regular, non-tabular file, and 
                         * appear as such to the user, until the ingest job is
                         * finished with the Ingest Service.
                         */
                dataFile.SetIngestScheduled();
            } else if (fileMetadataExtractable(dataFile)) {
                try {
                    // FITS is the only type supported for metadata
                    // extraction, as of now. -- L.A. 4.0
                    dataFile.setContentType("application/fits");
                    metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
                } catch (IOException mex) {
                    logger.severe("Caught exception trying to extract indexable metadata from file " + fileName + ",  " + mex.getMessage());
                }
                if (metadataExtracted) {
                    logger.fine("Successfully extracted indexable metadata from file " + fileName);
                } else {
                    logger.fine("Failed to extract indexable metadata from file " + fileName);
                }
            }
            // Try to save the file in its permanent location:
            String storageId = dataFile.getStorageIdentifier().replaceFirst("^tmp://", "");
            Path tempLocationPath = Paths.get(FileUtil.getFilesTempDirectory() + "/" + storageId);
            WritableByteChannel writeChannel = null;
            FileChannel readChannel = null;
            boolean localFile = false;
            boolean savedSuccess = false;
            StorageIO<DataFile> dataAccess = null;
            try {
                logger.fine("Attempting to create a new storageIO object for " + storageId);
                dataAccess = DataAccess.createNewStorageIO(dataFile, storageId);
                if (dataAccess.isLocalFile()) {
                    localFile = true;
                }
                logger.fine("Successfully created a new storageIO object.");
                /* 
                         This commented-out code demonstrates how to copy bytes
                         from a local InputStream (or a readChannel) into the
                         writable byte channel of a Dataverse DataAccessIO object:
                        */
                /*
                        storageIO.open(DataAccessOption.WRITE_ACCESS);
                                                
                        writeChannel = storageIO.getWriteChannel();
                        readChannel = new FileInputStream(tempLocationPath.toFile()).getChannel();
                                                
                        long bytesPerIteration = 16 * 1024; // 16K bytes
                        long start = 0;
                        while ( start < readChannel.size() ) {
                            readChannel.transferTo(start, bytesPerIteration, writeChannel);
                            start += bytesPerIteration;
                        }
                        */
                /* 
                            But it's easier to use this convenience method from the
                            DataAccessIO: 
                            
                            (if the underlying storage method for this file is 
                            local filesystem, the DataAccessIO will simply copy 
                            the file using Files.copy, like this:
                        
                            Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), StandardCopyOption.REPLACE_EXISTING);
                        */
                dataAccess.savePath(tempLocationPath);
                // Set filesize in bytes
                // 
                dataFile.setFilesize(dataAccess.getSize());
                savedSuccess = true;
                logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
            } catch (IOException ioex) {
                logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
            } finally {
                if (readChannel != null) {
                    try {
                        readChannel.close();
                    } catch (IOException e) {
                    }
                }
                if (writeChannel != null) {
                    try {
                        writeChannel.close();
                    } catch (IOException e) {
                    }
                }
            }
            // Since we may have already spent some CPU cycles scaling down image thumbnails,
            // we may as well save them, by moving these generated images to the permanent
            // dataset directory. We should also remember to delete any such files in the
            // temp directory:
            List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), storageId);
            if (generatedTempFiles != null) {
                for (Path generated : generatedTempFiles) {
                    if (savedSuccess) {
                        // && localFile) {
                        logger.fine("(Will also try to permanently save generated thumbnail file " + generated.toString() + ")");
                        try {
                            // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), generated.getFileName().toString()));
                            int i = generated.toString().lastIndexOf("thumb");
                            if (i > 1) {
                                String extensionTag = generated.toString().substring(i);
                                dataAccess.savePathAsAux(generated, extensionTag);
                                logger.fine("Saved generated thumbnail as aux object. \"preview available\" status: " + dataFile.isPreviewImageAvailable());
                            } else {
                                logger.warning("Generated thumbnail file name does not match the expected pattern: " + generated.toString());
                            }
                        } catch (IOException ioex) {
                            logger.warning("Failed to save generated file " + generated.toString());
                        }
                        try {
                            Files.delete(generated);
                        } catch (IOException ioex) {
                            logger.warning("Failed to delete generated file " + generated.toString());
                        }
                    }
                }
            }
            try {
                logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
                Files.delete(tempLocationPath);
            } catch (IOException ex) {
                // (non-fatal - it's just a temp file.)
                logger.warning("Failed to delete temp file " + tempLocationPath.toString());
            }
        // Any necessary post-processing:
        // performPostProcessingTasks(dataFile);
        }
        logger.fine("Done! Finished saving new files in permanent storage.");
    }
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) Path(java.nio.file.Path) Dataset(edu.harvard.iq.dataverse.Dataset) FileChannel(java.nio.channels.FileChannel) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) WritableByteChannel(java.nio.channels.WritableByteChannel) IOException(java.io.IOException)

Aggregations

DataFile (edu.harvard.iq.dataverse.DataFile)111 Dataset (edu.harvard.iq.dataverse.Dataset)39 IOException (java.io.IOException)39 FileMetadata (edu.harvard.iq.dataverse.FileMetadata)30 ArrayList (java.util.ArrayList)25 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)20 File (java.io.File)20 FileNotFoundException (java.io.FileNotFoundException)18 Path (javax.ws.rs.Path)18 Dataverse (edu.harvard.iq.dataverse.Dataverse)17 FileInputStream (java.io.FileInputStream)16 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)14 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)13 Date (java.util.Date)13 GET (javax.ws.rs.GET)13 Test (org.junit.Test)13 Timestamp (java.sql.Timestamp)11 InputStream (java.io.InputStream)10 DataVariable (edu.harvard.iq.dataverse.datavariable.DataVariable)8 FileOutputStream (java.io.FileOutputStream)8