use of edu.harvard.iq.dataverse.DataFile.ChecksumType in project dataverse by IQSS.
the class FileRecordWriter method createDataFile.
/**
* Create a DatasetFile and corresponding FileMetadata for a file on the filesystem and add it to the
* latest dataset version (if the user has AddDataset permissions for the dataset).
* @param file file to create dataFile from
* @return datafile
*/
private DataFile createDataFile(File file) {
DatasetVersion version = dataset.getLatestVersion();
String path = file.getAbsolutePath();
String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier();
String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1);
// we don't determine mime type
DataFile datafile = new DataFile("application/octet-stream");
datafile.setStorageIdentifier(relativePath);
datafile.setFilesize(file.length());
datafile.setModificationTime(new Timestamp(new Date().getTime()));
datafile.setCreateDate(new Timestamp(new Date().getTime()));
datafile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
datafile.setOwner(dataset);
datafile.setIngestDone();
// check system property first, otherwise use the batch job property
String jobChecksumType;
if (System.getProperty("checksumType") != null) {
jobChecksumType = System.getProperty("checksumType");
} else {
jobChecksumType = checksumType;
}
// initial default
datafile.setChecksumType(DataFile.ChecksumType.SHA1);
for (DataFile.ChecksumType type : DataFile.ChecksumType.values()) {
if (jobChecksumType.equalsIgnoreCase(type.name())) {
datafile.setChecksumType(type);
break;
}
}
// lookup the checksum value in the job's manifest hashmap
if (jobContext.getTransientUserData() != null) {
String checksumVal = ((Map<String, String>) jobContext.getTransientUserData()).get(relativePath);
if (checksumVal != null) {
datafile.setChecksumValue(checksumVal);
// remove the key, so we can check for unused checksums when the job is complete
((Map<String, String>) jobContext.getTransientUserData()).remove(relativePath);
} else {
datafile.setChecksumValue("Unknown");
getJobLogger().log(Level.WARNING, "Unable to find checksum in manifest for: " + file.getAbsolutePath());
}
} else {
getJobLogger().log(Level.SEVERE, "No checksum hashmap found in transientUserData");
jobContext.setExitStatus("FAILED");
return null;
}
// set metadata and add to latest version
FileMetadata fmd = new FileMetadata();
fmd.setLabel(file.getName());
// set the subdirectory if there is one
if (relativePath.contains(File.separator)) {
fmd.setDirectoryLabel(relativePath.replace(File.separator + file.getName(), ""));
}
fmd.setDataFile(datafile);
datafile.getFileMetadatas().add(fmd);
if (version.getFileMetadatas() == null)
version.setFileMetadatas(new ArrayList<>());
version.getFileMetadatas().add(fmd);
fmd.setDatasetVersion(version);
datafile = dataFileServiceBean.save(datafile);
return datafile;
}
use of edu.harvard.iq.dataverse.DataFile.ChecksumType in project dataverse by IQSS.
the class FileRecordWriter method createPackageDataFile.
/**
* Import the supplied batch of files as a single "package file" DataFile
* (basically, a folder/directory, with the single associated DataFile/FileMetadata, etc.)
* and add it to the
* latest dataset version
* @param files list of files, already copied to the dataset directory by rsync or otherwise.
* @return datafile
*
* Consider:
* instead of expecting to have an extra top-level directory/folder to be
* present already, generate it here (using the standard code used for generating
* storage identifiers for "normal" files), create it as a directory, and move
* all the supplied files there.l
*/
private DataFile createPackageDataFile(List<File> files) {
DataFile packageFile = new DataFile(DataFileServiceBean.MIME_TYPE_PACKAGE_FILE);
FileUtil.generateStorageIdentifier(packageFile);
String datasetDirectory = null;
String folderName = null;
long totalSize;
if (suppliedSize != null) {
totalSize = suppliedSize;
} else {
totalSize = 0L;
}
String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier();
// initial default
packageFile.setChecksumType(DataFile.ChecksumType.SHA1);
// check system property first, otherwise use the batch job property:
String jobChecksumType;
if (System.getProperty("checksumType") != null) {
jobChecksumType = System.getProperty("checksumType");
} else {
jobChecksumType = checksumType;
}
for (DataFile.ChecksumType type : DataFile.ChecksumType.values()) {
if (jobChecksumType.equalsIgnoreCase(type.name())) {
packageFile.setChecksumType(type);
break;
}
}
for (File file : files) {
String path = file.getAbsolutePath();
String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1);
// the folderName and datasetDirectory need to be initialized only once:
if (datasetDirectory == null && folderName == null) {
datasetDirectory = path.substring(0, path.indexOf(gid) + gid.length() + 1);
if (relativePath != null && relativePath.indexOf(File.separatorChar) > -1) {
folderName = relativePath.substring(0, relativePath.indexOf(File.separatorChar));
} else {
getJobLogger().log(Level.SEVERE, "Invalid file package (files are not in a folder)");
jobContext.setExitStatus("FAILED");
return null;
}
if (!uploadFolder.equals(folderName)) {
getJobLogger().log(Level.SEVERE, "Folder name mismatch: " + uploadFolder + " expected, " + folderName + " found.");
jobContext.setExitStatus("FAILED");
return null;
}
}
if (suppliedSize == null) {
totalSize += file.length();
}
String checksumValue;
// lookup the checksum value in the job's manifest hashmap
if (jobContext.getTransientUserData() != null) {
String manifestPath = relativePath.substring(folderName.length() + 1);
checksumValue = ((Map<String, String>) jobContext.getTransientUserData()).get(manifestPath);
if (checksumValue != null) {
// remove the key, so we can check for unused checksums when the job is complete
((Map<String, String>) jobContext.getTransientUserData()).remove(manifestPath);
} else {
getJobLogger().log(Level.WARNING, "Unable to find checksum in manifest for: " + file.getAbsolutePath());
}
} else {
getJobLogger().log(Level.SEVERE, "No checksum hashmap found in transientUserData");
jobContext.setExitStatus("FAILED");
return null;
}
}
if (System.getProperty("checksumManifest") != null) {
checksumManifest = System.getProperty("checksumManifest");
}
File checksumManifestFile = null;
if (checksumManifest != null && !checksumManifest.isEmpty()) {
String checksumManifestPath = datasetDirectory + File.separator + folderName + File.separator + checksumManifest;
checksumManifestFile = new File(checksumManifestPath);
if (!checksumManifestFile.exists()) {
getJobLogger().log(Level.WARNING, "Manifest file not found");
// TODO:
// add code to generate the manifest, if not present? -- L.A.
} else {
try {
packageFile.setChecksumValue(FileUtil.CalculateCheckSum(checksumManifestPath, packageFile.getChecksumType()));
} catch (Exception ex) {
getJobLogger().log(Level.SEVERE, "Failed to calculate checksum (type " + packageFile.getChecksumType() + ") " + ex.getMessage());
jobContext.setExitStatus("FAILED");
return null;
}
}
} else {
getJobLogger().log(Level.WARNING, "No checksumManifest property supplied");
}
// Move the folder to the final destination:
if (!(new File(datasetDirectory + File.separator + folderName).renameTo(new File(datasetDirectory + File.separator + packageFile.getStorageIdentifier())))) {
getJobLogger().log(Level.SEVERE, "Could not move the file folder to the final destination (" + datasetDirectory + File.separator + packageFile.getStorageIdentifier() + ")");
jobContext.setExitStatus("FAILED");
return null;
}
packageFile.setFilesize(totalSize);
packageFile.setModificationTime(new Timestamp(new Date().getTime()));
packageFile.setCreateDate(new Timestamp(new Date().getTime()));
packageFile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
packageFile.setOwner(dataset);
dataset.getFiles().add(packageFile);
packageFile.setIngestDone();
// set metadata and add to latest version
FileMetadata fmd = new FileMetadata();
fmd.setLabel(folderName);
fmd.setDataFile(packageFile);
packageFile.getFileMetadatas().add(fmd);
if (dataset.getLatestVersion().getFileMetadatas() == null)
dataset.getLatestVersion().setFileMetadatas(new ArrayList<>());
dataset.getLatestVersion().getFileMetadatas().add(fmd);
fmd.setDatasetVersion(dataset.getLatestVersion());
getJobLogger().log(Level.INFO, "Successfully created a file of type package");
return packageFile;
}
Aggregations