Search in sources :

Example 21 with FileMetadata

use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.

the class DatasetUtil method attemptToAutomaticallySelectThumbnailFromDataFiles.

/**
 * Pass an optional datasetVersion in case the file system is checked
 *
 * @param dataset
 * @param datasetVersion
 * @return
 */
public static DataFile attemptToAutomaticallySelectThumbnailFromDataFiles(Dataset dataset, DatasetVersion datasetVersion) {
    if (dataset == null) {
        return null;
    }
    if (dataset.isUseGenericThumbnail()) {
        logger.fine("Bypassing logic to find a thumbnail because a generic icon for the dataset is desired.");
        return null;
    }
    if (datasetVersion == null) {
        logger.fine("getting latest version of dataset");
        datasetVersion = dataset.getLatestVersion();
    }
    for (FileMetadata fmd : datasetVersion.getFileMetadatas()) {
        DataFile testFile = fmd.getDataFile();
        if (FileUtil.isThumbnailSupported(testFile) && ImageThumbConverter.isThumbnailAvailable(testFile, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE)) {
            return testFile;
        }
    }
    logger.fine("In attemptToAutomaticallySelectThumbnailFromDataFiles and interated through all the files but couldn't find a thumbnail.");
    return null;
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) FileMetadata(edu.harvard.iq.dataverse.FileMetadata)

Example 22 with FileMetadata

use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.

the class IngestServiceBean method addFiles.

// This method tries to permanently store the files on the filesystem.
// It should be called before we attempt to permanently save the files in
// the database by calling the Save command on the dataset and/or version.
// TODO: rename the method finalizeFiles()? or something like that?
public void addFiles(DatasetVersion version, List<DataFile> newFiles) {
    if (newFiles != null && newFiles.size() > 0) {
        // final check for duplicate file names;
        // we tried to make the file names unique on upload, but then
        // the user may have edited them on the "add files" page, and
        // renamed FOOBAR-1.txt back to FOOBAR.txt...
        IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles);
        Dataset dataset = version.getDataset();
        for (DataFile dataFile : newFiles) {
            String tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier();
            // These are all brand new files, so they should all have
            // one filemetadata total. -- L.A.
            FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
            String fileName = fileMetadata.getLabel();
            // hasn't been done yet:
            if (dataFile.getOwner() == null) {
                dataFile.setOwner(dataset);
                version.getFileMetadatas().add(dataFile.getFileMetadata());
                dataFile.getFileMetadata().setDatasetVersion(version);
                dataset.getFiles().add(dataFile);
            }
            boolean metadataExtracted = false;
            if (FileUtil.ingestableAsTabular(dataFile)) {
                /*
                         * Note that we don't try to ingest the file right away - 
                         * instead we mark it as "scheduled for ingest", then at 
                         * the end of the save process it will be queued for async. 
                         * ingest in the background. In the meantime, the file 
                         * will be ingested as a regular, non-tabular file, and 
                         * appear as such to the user, until the ingest job is
                         * finished with the Ingest Service.
                         */
                dataFile.SetIngestScheduled();
            } else if (fileMetadataExtractable(dataFile)) {
                try {
                    // FITS is the only type supported for metadata
                    // extraction, as of now. -- L.A. 4.0
                    dataFile.setContentType("application/fits");
                    metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
                } catch (IOException mex) {
                    logger.severe("Caught exception trying to extract indexable metadata from file " + fileName + ",  " + mex.getMessage());
                }
                if (metadataExtracted) {
                    logger.fine("Successfully extracted indexable metadata from file " + fileName);
                } else {
                    logger.fine("Failed to extract indexable metadata from file " + fileName);
                }
            }
            // Try to save the file in its permanent location:
            String storageId = dataFile.getStorageIdentifier().replaceFirst("^tmp://", "");
            Path tempLocationPath = Paths.get(FileUtil.getFilesTempDirectory() + "/" + storageId);
            WritableByteChannel writeChannel = null;
            FileChannel readChannel = null;
            boolean localFile = false;
            boolean savedSuccess = false;
            StorageIO<DataFile> dataAccess = null;
            try {
                logger.fine("Attempting to create a new storageIO object for " + storageId);
                dataAccess = DataAccess.createNewStorageIO(dataFile, storageId);
                if (dataAccess.isLocalFile()) {
                    localFile = true;
                }
                logger.fine("Successfully created a new storageIO object.");
                /* 
                         This commented-out code demonstrates how to copy bytes
                         from a local InputStream (or a readChannel) into the
                         writable byte channel of a Dataverse DataAccessIO object:
                        */
                /*
                        storageIO.open(DataAccessOption.WRITE_ACCESS);
                                                
                        writeChannel = storageIO.getWriteChannel();
                        readChannel = new FileInputStream(tempLocationPath.toFile()).getChannel();
                                                
                        long bytesPerIteration = 16 * 1024; // 16K bytes
                        long start = 0;
                        while ( start < readChannel.size() ) {
                            readChannel.transferTo(start, bytesPerIteration, writeChannel);
                            start += bytesPerIteration;
                        }
                        */
                /* 
                            But it's easier to use this convenience method from the
                            DataAccessIO: 
                            
                            (if the underlying storage method for this file is 
                            local filesystem, the DataAccessIO will simply copy 
                            the file using Files.copy, like this:
                        
                            Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), StandardCopyOption.REPLACE_EXISTING);
                        */
                dataAccess.savePath(tempLocationPath);
                // Set filesize in bytes
                // 
                dataFile.setFilesize(dataAccess.getSize());
                savedSuccess = true;
                logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
            } catch (IOException ioex) {
                logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
            } finally {
                if (readChannel != null) {
                    try {
                        readChannel.close();
                    } catch (IOException e) {
                    }
                }
                if (writeChannel != null) {
                    try {
                        writeChannel.close();
                    } catch (IOException e) {
                    }
                }
            }
            // Since we may have already spent some CPU cycles scaling down image thumbnails,
            // we may as well save them, by moving these generated images to the permanent
            // dataset directory. We should also remember to delete any such files in the
            // temp directory:
            List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), storageId);
            if (generatedTempFiles != null) {
                for (Path generated : generatedTempFiles) {
                    if (savedSuccess) {
                        // && localFile) {
                        logger.fine("(Will also try to permanently save generated thumbnail file " + generated.toString() + ")");
                        try {
                            // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), generated.getFileName().toString()));
                            int i = generated.toString().lastIndexOf("thumb");
                            if (i > 1) {
                                String extensionTag = generated.toString().substring(i);
                                dataAccess.savePathAsAux(generated, extensionTag);
                                logger.fine("Saved generated thumbnail as aux object. \"preview available\" status: " + dataFile.isPreviewImageAvailable());
                            } else {
                                logger.warning("Generated thumbnail file name does not match the expected pattern: " + generated.toString());
                            }
                        } catch (IOException ioex) {
                            logger.warning("Failed to save generated file " + generated.toString());
                        }
                        try {
                            Files.delete(generated);
                        } catch (IOException ioex) {
                            logger.warning("Failed to delete generated file " + generated.toString());
                        }
                    }
                }
            }
            try {
                logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
                Files.delete(tempLocationPath);
            } catch (IOException ex) {
                // (non-fatal - it's just a temp file.)
                logger.warning("Failed to delete temp file " + tempLocationPath.toString());
            }
        // Any necessary post-processing:
        // performPostProcessingTasks(dataFile);
        }
        logger.fine("Done! Finished saving new files in permanent storage.");
    }
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) Path(java.nio.file.Path) Dataset(edu.harvard.iq.dataverse.Dataset) FileChannel(java.nio.channels.FileChannel) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) WritableByteChannel(java.nio.channels.WritableByteChannel) IOException(java.io.IOException)

Example 23 with FileMetadata

use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.

the class IngestServiceBean method extractMetadata.

/* 
     * extractMetadata: 
     * framework for extracting metadata from uploaded files. The results will 
     * be used to populate the metadata of the Dataset to which the file belongs. 
    */
public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException {
    boolean ingestSuccessful = false;
    FileInputStream tempFileInputStream = null;
    try {
        tempFileInputStream = new FileInputStream(new File(tempFileLocation));
    } catch (FileNotFoundException notfoundEx) {
        throw new IOException("Could not open temp file " + tempFileLocation);
    }
    // Locate metadata extraction plugin for the file format by looking
    // it up with the Ingest Service Provider Registry:
    // FileMetadataExtractor extractorPlugin = IngestSP.getMetadataExtractorByMIMEType(dfile.getContentType());
    FileMetadataExtractor extractorPlugin = new FITSFileMetadataExtractor();
    FileMetadataIngest extractedMetadata = extractorPlugin.ingest(new BufferedInputStream(tempFileInputStream));
    Map<String, Set<String>> extractedMetadataMap = extractedMetadata.getMetadataMap();
    // Store the fields and values we've gathered for safe-keeping:
    // from 3.6:
    // attempt to ingest the extracted metadata into the database;
    // TODO: this should throw an exception if anything goes wrong.
    FileMetadata fileMetadata = dataFile.getFileMetadata();
    if (extractedMetadataMap != null) {
        logger.fine("Ingest Service: Processing extracted metadata;");
        if (extractedMetadata.getMetadataBlockName() != null) {
            logger.fine("Ingest Service: This metadata belongs to the " + extractedMetadata.getMetadataBlockName() + " metadata block.");
            processDatasetMetadata(extractedMetadata, editVersion);
        }
        processFileLevelMetadata(extractedMetadata, fileMetadata);
    }
    ingestSuccessful = true;
    return ingestSuccessful;
}
Also used : Set(java.util.Set) LinkedHashSet(java.util.LinkedHashSet) FileMetadataIngest(edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataIngest) FileNotFoundException(java.io.FileNotFoundException) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) FileMetadataExtractor(edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataExtractor) FITSFileMetadataExtractor(edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.fits.FITSFileMetadataExtractor) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) FITSFileMetadataExtractor(edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.fits.FITSFileMetadataExtractor) BufferedInputStream(java.io.BufferedInputStream) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File)

Example 24 with FileMetadata

use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.

the class IngestUtil method existingPathNamesAsSet.

private static Set<String> existingPathNamesAsSet(DatasetVersion version, FileMetadata fileMetadata) {
    Set<String> pathNamesExisting = new HashSet<>();
    // be able to call this method
    for (Iterator<FileMetadata> fmIt = version.getFileMetadatas().iterator(); fmIt.hasNext(); ) {
        FileMetadata fm = fmIt.next();
        if (fm.getId() != null && (fileMetadata == null || !fm.getId().equals(fileMetadata.getId()))) {
            String existingName = fm.getLabel();
            String existingDir = fm.getDirectoryLabel();
            String existingPath = makePathName(existingDir, existingName);
            if (!existingPath.isEmpty()) {
                pathNamesExisting.add(existingPath);
                // match. e.g. stata file foobar.dta becomes foobar.tab once ingested!
                if (fm.getDataFile().isTabularData()) {
                    String originalPath;
                    String originalMimeType = fm.getDataFile().getDataTable().getOriginalFileFormat();
                    if (originalMimeType != null) {
                        String origFileExtension = FileUtil.generateOriginalExtension(originalMimeType);
                        originalPath = existingPath.replaceAll(".tab$", origFileExtension);
                    } else {
                        originalPath = existingPath.replaceAll(".tab$", "");
                    }
                    pathNamesExisting.add(originalPath);
                }
            }
        }
    }
    return pathNamesExisting;
}
Also used : FileMetadata(edu.harvard.iq.dataverse.FileMetadata) HashSet(java.util.HashSet)

Example 25 with FileMetadata

use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.

the class IngestUtil method getUnfValuesOfFiles.

public static List<String> getUnfValuesOfFiles(DatasetVersion version) {
    List<String> unfValueList = new ArrayList<>();
    if (version == null) {
        return unfValueList;
    }
    Iterator<FileMetadata> itfm = version.getFileMetadatas().iterator();
    while (itfm.hasNext()) {
        FileMetadata fileMetadata = itfm.next();
        if (fileMetadata != null && fileMetadata.getDataFile() != null && fileMetadata.getDataFile().isTabularData() && fileMetadata.getDataFile().getUnf() != null) {
            String varunf = fileMetadata.getDataFile().getUnf();
            unfValueList.add(varunf);
        }
    }
    return unfValueList;
}
Also used : ArrayList(java.util.ArrayList) FileMetadata(edu.harvard.iq.dataverse.FileMetadata)

Aggregations

FileMetadata (edu.harvard.iq.dataverse.FileMetadata)54 DataFile (edu.harvard.iq.dataverse.DataFile)30 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)26 ArrayList (java.util.ArrayList)23 Dataset (edu.harvard.iq.dataverse.Dataset)18 Test (org.junit.Test)13 Date (java.util.Date)12 IOException (java.io.IOException)10 Timestamp (java.sql.Timestamp)10 DataTable (edu.harvard.iq.dataverse.DataTable)5 DatasetField (edu.harvard.iq.dataverse.DatasetField)5 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)5 MocksFactory.makeDataset (edu.harvard.iq.dataverse.mocks.MocksFactory.makeDataset)5 SimpleDateFormat (java.text.SimpleDateFormat)5 HashMap (java.util.HashMap)5 Dataverse (edu.harvard.iq.dataverse.Dataverse)4 File (java.io.File)4 FileNotFoundException (java.io.FileNotFoundException)4 JsonObjectBuilder (javax.json.JsonObjectBuilder)4 DataFileTag (edu.harvard.iq.dataverse.DataFileTag)3