Search in sources :

Example 1 with ChecksumType

use of edu.harvard.iq.dataverse.DataFile.ChecksumType in project dataverse by IQSS.

the class FileRecordWriter method createDataFile.

/**
 * Create a DatasetFile and corresponding FileMetadata for a file on the filesystem and add it to the
 * latest dataset version (if the user has AddDataset permissions for the dataset).
 * @param file file to create dataFile from
 * @return datafile
 */
private DataFile createDataFile(File file) {
    DatasetVersion version = dataset.getLatestVersion();
    String path = file.getAbsolutePath();
    String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier();
    String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1);
    // we don't determine mime type
    DataFile datafile = new DataFile("application/octet-stream");
    datafile.setStorageIdentifier(relativePath);
    datafile.setFilesize(file.length());
    datafile.setModificationTime(new Timestamp(new Date().getTime()));
    datafile.setCreateDate(new Timestamp(new Date().getTime()));
    datafile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
    datafile.setOwner(dataset);
    datafile.setIngestDone();
    // check system property first, otherwise use the batch job property
    String jobChecksumType;
    if (System.getProperty("checksumType") != null) {
        jobChecksumType = System.getProperty("checksumType");
    } else {
        jobChecksumType = checksumType;
    }
    // initial default
    datafile.setChecksumType(DataFile.ChecksumType.SHA1);
    for (DataFile.ChecksumType type : DataFile.ChecksumType.values()) {
        if (jobChecksumType.equalsIgnoreCase(type.name())) {
            datafile.setChecksumType(type);
            break;
        }
    }
    // lookup the checksum value in the job's manifest hashmap
    if (jobContext.getTransientUserData() != null) {
        String checksumVal = ((Map<String, String>) jobContext.getTransientUserData()).get(relativePath);
        if (checksumVal != null) {
            datafile.setChecksumValue(checksumVal);
            // remove the key, so we can check for unused checksums when the job is complete
            ((Map<String, String>) jobContext.getTransientUserData()).remove(relativePath);
        } else {
            datafile.setChecksumValue("Unknown");
            getJobLogger().log(Level.WARNING, "Unable to find checksum in manifest for: " + file.getAbsolutePath());
        }
    } else {
        getJobLogger().log(Level.SEVERE, "No checksum hashmap found in transientUserData");
        jobContext.setExitStatus("FAILED");
        return null;
    }
    // set metadata and add to latest version
    FileMetadata fmd = new FileMetadata();
    fmd.setLabel(file.getName());
    // set the subdirectory if there is one
    if (relativePath.contains(File.separator)) {
        fmd.setDirectoryLabel(relativePath.replace(File.separator + file.getName(), ""));
    }
    fmd.setDataFile(datafile);
    datafile.getFileMetadatas().add(fmd);
    if (version.getFileMetadatas() == null)
        version.setFileMetadatas(new ArrayList<>());
    version.getFileMetadatas().add(fmd);
    fmd.setDatasetVersion(version);
    datafile = dataFileServiceBean.save(datafile);
    return datafile;
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) ChecksumType(edu.harvard.iq.dataverse.DataFile.ChecksumType) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) ArrayList(java.util.ArrayList) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) Timestamp(java.sql.Timestamp) HashMap(java.util.HashMap) Map(java.util.Map) Date(java.util.Date)

Example 2 with ChecksumType

use of edu.harvard.iq.dataverse.DataFile.ChecksumType in project dataverse by IQSS.

the class FileRecordWriter method createPackageDataFile.

/**
 * Import the supplied batch of files as a single "package file" DataFile
 * (basically, a folder/directory, with the single associated DataFile/FileMetadata, etc.)
 * and add it to the
 * latest dataset version
 * @param files list of files, already copied to the dataset directory by rsync or otherwise.
 * @return datafile
 *
 * Consider:
 * instead of expecting to have an extra top-level directory/folder to be
 * present already, generate it here (using the standard code used for generating
 * storage identifiers for "normal" files), create it as a directory, and move
 * all the supplied files there.l
 */
private DataFile createPackageDataFile(List<File> files) {
    DataFile packageFile = new DataFile(DataFileServiceBean.MIME_TYPE_PACKAGE_FILE);
    FileUtil.generateStorageIdentifier(packageFile);
    String datasetDirectory = null;
    String folderName = null;
    long totalSize;
    if (suppliedSize != null) {
        totalSize = suppliedSize;
    } else {
        totalSize = 0L;
    }
    String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier();
    // initial default
    packageFile.setChecksumType(DataFile.ChecksumType.SHA1);
    // check system property first, otherwise use the batch job property:
    String jobChecksumType;
    if (System.getProperty("checksumType") != null) {
        jobChecksumType = System.getProperty("checksumType");
    } else {
        jobChecksumType = checksumType;
    }
    for (DataFile.ChecksumType type : DataFile.ChecksumType.values()) {
        if (jobChecksumType.equalsIgnoreCase(type.name())) {
            packageFile.setChecksumType(type);
            break;
        }
    }
    for (File file : files) {
        String path = file.getAbsolutePath();
        String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1);
        // the folderName and datasetDirectory need to be initialized only once:
        if (datasetDirectory == null && folderName == null) {
            datasetDirectory = path.substring(0, path.indexOf(gid) + gid.length() + 1);
            if (relativePath != null && relativePath.indexOf(File.separatorChar) > -1) {
                folderName = relativePath.substring(0, relativePath.indexOf(File.separatorChar));
            } else {
                getJobLogger().log(Level.SEVERE, "Invalid file package (files are not in a folder)");
                jobContext.setExitStatus("FAILED");
                return null;
            }
            if (!uploadFolder.equals(folderName)) {
                getJobLogger().log(Level.SEVERE, "Folder name mismatch: " + uploadFolder + " expected, " + folderName + " found.");
                jobContext.setExitStatus("FAILED");
                return null;
            }
        }
        if (suppliedSize == null) {
            totalSize += file.length();
        }
        String checksumValue;
        // lookup the checksum value in the job's manifest hashmap
        if (jobContext.getTransientUserData() != null) {
            String manifestPath = relativePath.substring(folderName.length() + 1);
            checksumValue = ((Map<String, String>) jobContext.getTransientUserData()).get(manifestPath);
            if (checksumValue != null) {
                // remove the key, so we can check for unused checksums when the job is complete
                ((Map<String, String>) jobContext.getTransientUserData()).remove(manifestPath);
            } else {
                getJobLogger().log(Level.WARNING, "Unable to find checksum in manifest for: " + file.getAbsolutePath());
            }
        } else {
            getJobLogger().log(Level.SEVERE, "No checksum hashmap found in transientUserData");
            jobContext.setExitStatus("FAILED");
            return null;
        }
    }
    if (System.getProperty("checksumManifest") != null) {
        checksumManifest = System.getProperty("checksumManifest");
    }
    File checksumManifestFile = null;
    if (checksumManifest != null && !checksumManifest.isEmpty()) {
        String checksumManifestPath = datasetDirectory + File.separator + folderName + File.separator + checksumManifest;
        checksumManifestFile = new File(checksumManifestPath);
        if (!checksumManifestFile.exists()) {
            getJobLogger().log(Level.WARNING, "Manifest file not found");
        // TODO:
        // add code to generate the manifest, if not present? -- L.A.
        } else {
            try {
                packageFile.setChecksumValue(FileUtil.CalculateCheckSum(checksumManifestPath, packageFile.getChecksumType()));
            } catch (Exception ex) {
                getJobLogger().log(Level.SEVERE, "Failed to calculate checksum (type " + packageFile.getChecksumType() + ") " + ex.getMessage());
                jobContext.setExitStatus("FAILED");
                return null;
            }
        }
    } else {
        getJobLogger().log(Level.WARNING, "No checksumManifest property supplied");
    }
    // Move the folder to the final destination:
    if (!(new File(datasetDirectory + File.separator + folderName).renameTo(new File(datasetDirectory + File.separator + packageFile.getStorageIdentifier())))) {
        getJobLogger().log(Level.SEVERE, "Could not move the file folder to the final destination (" + datasetDirectory + File.separator + packageFile.getStorageIdentifier() + ")");
        jobContext.setExitStatus("FAILED");
        return null;
    }
    packageFile.setFilesize(totalSize);
    packageFile.setModificationTime(new Timestamp(new Date().getTime()));
    packageFile.setCreateDate(new Timestamp(new Date().getTime()));
    packageFile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
    packageFile.setOwner(dataset);
    dataset.getFiles().add(packageFile);
    packageFile.setIngestDone();
    // set metadata and add to latest version
    FileMetadata fmd = new FileMetadata();
    fmd.setLabel(folderName);
    fmd.setDataFile(packageFile);
    packageFile.getFileMetadatas().add(fmd);
    if (dataset.getLatestVersion().getFileMetadatas() == null)
        dataset.getLatestVersion().setFileMetadatas(new ArrayList<>());
    dataset.getLatestVersion().getFileMetadatas().add(fmd);
    fmd.setDatasetVersion(dataset.getLatestVersion());
    getJobLogger().log(Level.INFO, "Successfully created a file of type package");
    return packageFile;
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) ChecksumType(edu.harvard.iq.dataverse.DataFile.ChecksumType) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) ArrayList(java.util.ArrayList) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Timestamp(java.sql.Timestamp) CommandException(edu.harvard.iq.dataverse.engine.command.exception.CommandException) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) Date(java.util.Date)

Aggregations

DataFile (edu.harvard.iq.dataverse.DataFile)2 ChecksumType (edu.harvard.iq.dataverse.DataFile.ChecksumType)2 FileMetadata (edu.harvard.iq.dataverse.FileMetadata)2 Timestamp (java.sql.Timestamp)2 ArrayList (java.util.ArrayList)2 Date (java.util.Date)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)1 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)1 File (java.io.File)1 IOException (java.io.IOException)1 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)1