use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class FileRecordWriter method createPackageDataFile.
/**
* Import the supplied batch of files as a single "package file" DataFile
* (basically, a folder/directory, with the single associated DataFile/FileMetadata, etc.)
* and add it to the
* latest dataset version
* @param files list of files, already copied to the dataset directory by rsync or otherwise.
* @return datafile
*
* Consider:
* instead of expecting to have an extra top-level directory/folder to be
* present already, generate it here (using the standard code used for generating
* storage identifiers for "normal" files), create it as a directory, and move
* all the supplied files there.l
*/
private DataFile createPackageDataFile(List<File> files) {
DataFile packageFile = new DataFile(DataFileServiceBean.MIME_TYPE_PACKAGE_FILE);
FileUtil.generateStorageIdentifier(packageFile);
String datasetDirectory = null;
String folderName = null;
long totalSize;
if (suppliedSize != null) {
totalSize = suppliedSize;
} else {
totalSize = 0L;
}
String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier();
// initial default
packageFile.setChecksumType(DataFile.ChecksumType.SHA1);
// check system property first, otherwise use the batch job property:
String jobChecksumType;
if (System.getProperty("checksumType") != null) {
jobChecksumType = System.getProperty("checksumType");
} else {
jobChecksumType = checksumType;
}
for (DataFile.ChecksumType type : DataFile.ChecksumType.values()) {
if (jobChecksumType.equalsIgnoreCase(type.name())) {
packageFile.setChecksumType(type);
break;
}
}
for (File file : files) {
String path = file.getAbsolutePath();
String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1);
// the folderName and datasetDirectory need to be initialized only once:
if (datasetDirectory == null && folderName == null) {
datasetDirectory = path.substring(0, path.indexOf(gid) + gid.length() + 1);
if (relativePath != null && relativePath.indexOf(File.separatorChar) > -1) {
folderName = relativePath.substring(0, relativePath.indexOf(File.separatorChar));
} else {
getJobLogger().log(Level.SEVERE, "Invalid file package (files are not in a folder)");
jobContext.setExitStatus("FAILED");
return null;
}
if (!uploadFolder.equals(folderName)) {
getJobLogger().log(Level.SEVERE, "Folder name mismatch: " + uploadFolder + " expected, " + folderName + " found.");
jobContext.setExitStatus("FAILED");
return null;
}
}
if (suppliedSize == null) {
totalSize += file.length();
}
String checksumValue;
// lookup the checksum value in the job's manifest hashmap
if (jobContext.getTransientUserData() != null) {
String manifestPath = relativePath.substring(folderName.length() + 1);
checksumValue = ((Map<String, String>) jobContext.getTransientUserData()).get(manifestPath);
if (checksumValue != null) {
// remove the key, so we can check for unused checksums when the job is complete
((Map<String, String>) jobContext.getTransientUserData()).remove(manifestPath);
} else {
getJobLogger().log(Level.WARNING, "Unable to find checksum in manifest for: " + file.getAbsolutePath());
}
} else {
getJobLogger().log(Level.SEVERE, "No checksum hashmap found in transientUserData");
jobContext.setExitStatus("FAILED");
return null;
}
}
if (System.getProperty("checksumManifest") != null) {
checksumManifest = System.getProperty("checksumManifest");
}
File checksumManifestFile = null;
if (checksumManifest != null && !checksumManifest.isEmpty()) {
String checksumManifestPath = datasetDirectory + File.separator + folderName + File.separator + checksumManifest;
checksumManifestFile = new File(checksumManifestPath);
if (!checksumManifestFile.exists()) {
getJobLogger().log(Level.WARNING, "Manifest file not found");
// TODO:
// add code to generate the manifest, if not present? -- L.A.
} else {
try {
packageFile.setChecksumValue(FileUtil.CalculateCheckSum(checksumManifestPath, packageFile.getChecksumType()));
} catch (Exception ex) {
getJobLogger().log(Level.SEVERE, "Failed to calculate checksum (type " + packageFile.getChecksumType() + ") " + ex.getMessage());
jobContext.setExitStatus("FAILED");
return null;
}
}
} else {
getJobLogger().log(Level.WARNING, "No checksumManifest property supplied");
}
// Move the folder to the final destination:
if (!(new File(datasetDirectory + File.separator + folderName).renameTo(new File(datasetDirectory + File.separator + packageFile.getStorageIdentifier())))) {
getJobLogger().log(Level.SEVERE, "Could not move the file folder to the final destination (" + datasetDirectory + File.separator + packageFile.getStorageIdentifier() + ")");
jobContext.setExitStatus("FAILED");
return null;
}
packageFile.setFilesize(totalSize);
packageFile.setModificationTime(new Timestamp(new Date().getTime()));
packageFile.setCreateDate(new Timestamp(new Date().getTime()));
packageFile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
packageFile.setOwner(dataset);
dataset.getFiles().add(packageFile);
packageFile.setIngestDone();
// set metadata and add to latest version
FileMetadata fmd = new FileMetadata();
fmd.setLabel(folderName);
fmd.setDataFile(packageFile);
packageFile.getFileMetadatas().add(fmd);
if (dataset.getLatestVersion().getFileMetadatas() == null)
dataset.getLatestVersion().setFileMetadatas(new ArrayList<>());
dataset.getLatestVersion().getFileMetadatas().add(fmd);
fmd.setDatasetVersion(dataset.getLatestVersion());
getJobLogger().log(Level.INFO, "Successfully created a file of type package");
return packageFile;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class DatasetUtilTest method testGetThumbnailRestricted.
@Test
public void testGetThumbnailRestricted() {
System.out.println("testGetThumbnailRestricted");
Dataset dataset = new Dataset();
DataFile thumbnailFile = new DataFile();
thumbnailFile.setId(42l);
thumbnailFile.setRestricted(true);
dataset.setThumbnailFile(thumbnailFile);
DatasetThumbnail result = DatasetUtil.getThumbnail(dataset);
assertNull(result);
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class FileAccessIOTest method setUpClass.
@Before
public void setUpClass() throws IOException {
dataverse = MocksFactory.makeDataverse();
dataset = MocksFactory.makeDataset();
dataset.setOwner(dataverse);
dataset.setAuthority("tmp");
dataset.setIdentifier("dataset");
dataset.setStorageIdentifier("Dataset");
dataFile = MocksFactory.makeDataFile();
dataFile.setOwner(dataset);
dataFile.setStorageIdentifier("DataFile");
datasetAccess = new FileAccessIO<>(dataset);
dataFileAccess = new FileAccessIO<>(dataFile);
dataverseAccess = new FileAccessIO<>(dataverse);
File file = new File("/tmp/files/tmp/dataset/Dataset");
file.getParentFile().mkdirs();
file.createNewFile();
new File("/tmp/files/tmp/dataset/DataFile").createNewFile();
try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) {
bw.write("This is a test string");
}
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class IngestUtilTest method testRecalculateDatasetVersionUNF.
@Test
public void testRecalculateDatasetVersionUNF() {
IngestUtil.recalculateDatasetVersionUNF(null);
DatasetVersion dsvNoFile = new DatasetVersion();
IngestUtil.recalculateDatasetVersionUNF(dsvNoFile);
assertEquals(null, dsvNoFile.getUNF());
List<Dataset> datasets = new ArrayList<>();
Dataset dataset = new Dataset();
dataset.setProtocol("doi");
dataset.setAuthority("fakeAuthority");
dataset.setIdentifier("12345");
DatasetVersion dsv1 = new DatasetVersion();
dsv1.setDataset(dataset);
dsv1.setId(42l);
dsv1.setVersionState(DatasetVersion.VersionState.DRAFT);
List<DatasetVersion> datasetVersions = new ArrayList<>();
datasetVersions.add(dsv1);
DataFile datafile1 = new DataFile("application/octet-stream");
DataTable dataTable = new DataTable();
dataTable.setUnf("unfOnDataTable");
datafile1.setDataTable(dataTable);
assertEquals(true, datafile1.isTabularData());
FileMetadata fmd1 = new FileMetadata();
fmd1.setId(1L);
fmd1.setLabel("datafile1.txt");
fmd1.setDataFile(datafile1);
datafile1.getFileMetadatas().add(fmd1);
dsv1.getFileMetadatas().add(fmd1);
fmd1.setDatasetVersion(dsv1);
dataset.setVersions(datasetVersions);
datasets.add(dataset);
assertEquals(null, dsv1.getUNF());
IngestUtil.recalculateDatasetVersionUNF(dsv1);
assertEquals("UNF:6:rDlgOhoEkEQQdwtLRHjmtw==", dsv1.getUNF());
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class IngestUtilTest method testCheckForDuplicateFileNamesWithDirectories.
@Test
public /**
* Test adding duplicate file name labels with directories, including a
* duplicate file name label in another directory.
*/
void testCheckForDuplicateFileNamesWithDirectories() throws Exception {
SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd");
// create dataset
Dataset dataset = makeDataset();
// create dataset version
DatasetVersion datasetVersion = dataset.getEditVersion();
datasetVersion.setCreateTime(dateFmt.parse("20001012"));
datasetVersion.setLastUpdateTime(datasetVersion.getLastUpdateTime());
datasetVersion.setId(MocksFactory.nextId());
datasetVersion.setReleaseTime(dateFmt.parse("20010101"));
datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED);
datasetVersion.setMinorVersionNumber(0L);
datasetVersion.setVersionNumber(1L);
datasetVersion.setFileMetadatas(new ArrayList<>());
// create datafiles
List<DataFile> dataFileList = new ArrayList<>();
DataFile datafile1 = new DataFile("application/octet-stream");
datafile1.setStorageIdentifier("subdir/datafile1.txt");
datafile1.setFilesize(200);
datafile1.setModificationTime(new Timestamp(new Date().getTime()));
datafile1.setCreateDate(new Timestamp(new Date().getTime()));
datafile1.setPermissionModificationTime(new Timestamp(new Date().getTime()));
datafile1.setOwner(dataset);
datafile1.setIngestDone();
datafile1.setChecksumType(DataFile.ChecksumType.SHA1);
datafile1.setChecksumValue("Unknown");
// set metadata and add version
FileMetadata fmd1 = new FileMetadata();
fmd1.setId(1L);
fmd1.setLabel("datafile1.txt");
fmd1.setDirectoryLabel("subdir");
fmd1.setDataFile(datafile1);
datafile1.getFileMetadatas().add(fmd1);
datasetVersion.getFileMetadatas().add(fmd1);
fmd1.setDatasetVersion(datasetVersion);
dataFileList.add(datafile1);
DataFile datafile2 = new DataFile("application/octet-stream");
datafile2.setStorageIdentifier("subdir/datafile2.txt");
datafile2.setFilesize(200);
datafile2.setModificationTime(new Timestamp(new Date().getTime()));
datafile2.setCreateDate(new Timestamp(new Date().getTime()));
datafile2.setPermissionModificationTime(new Timestamp(new Date().getTime()));
datafile2.setOwner(dataset);
datafile2.setIngestDone();
datafile2.setChecksumType(DataFile.ChecksumType.SHA1);
datafile2.setChecksumValue("Unknown");
// set metadata and add version
FileMetadata fmd2 = new FileMetadata();
fmd2.setId(2L);
fmd2.setLabel("datafile2.txt");
fmd2.setDirectoryLabel("subdir");
fmd2.setDataFile(datafile2);
datafile2.getFileMetadatas().add(fmd2);
datasetVersion.getFileMetadatas().add(fmd2);
fmd2.setDatasetVersion(datasetVersion);
dataFileList.add(datafile2);
DataFile datafile3 = new DataFile("application/octet-stream");
datafile3.setStorageIdentifier("datafile2.txt");
datafile3.setFilesize(200);
datafile3.setModificationTime(new Timestamp(new Date().getTime()));
datafile3.setCreateDate(new Timestamp(new Date().getTime()));
datafile3.setPermissionModificationTime(new Timestamp(new Date().getTime()));
datafile3.setOwner(dataset);
datafile3.setIngestDone();
datafile3.setChecksumType(DataFile.ChecksumType.SHA1);
datafile3.setChecksumValue("Unknown");
// set metadata and add version
FileMetadata fmd3 = new FileMetadata();
fmd3.setId(3L);
fmd3.setLabel("datafile2.txt");
fmd3.setDataFile(datafile3);
datafile3.getFileMetadatas().add(fmd3);
dataFileList.add(datafile3);
IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList);
boolean file1NameAltered = false;
boolean file2NameAltered = false;
boolean file3NameAltered = true;
for (DataFile df : dataFileList) {
if (df.getFileMetadata().getLabel().equals("datafile1-1.txt")) {
file1NameAltered = true;
}
if (df.getFileMetadata().getLabel().equals("datafile2-1.txt")) {
file2NameAltered = true;
}
if (df.getFileMetadata().getLabel().equals("datafile2.txt")) {
file3NameAltered = false;
}
}
// check filenames are unique
assertEquals(file1NameAltered, true);
assertEquals(file2NameAltered, true);
assertEquals(file3NameAltered, false);
// add duplicate file in root
datasetVersion.getFileMetadatas().add(fmd3);
fmd3.setDatasetVersion(datasetVersion);
// try to add data files with "-1" duplicates and see if it gets incremented to "-2"
IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList);
for (DataFile df : dataFileList) {
if (df.getFileMetadata().getLabel().equals("datafile1-2.txt")) {
file1NameAltered = true;
}
if (df.getFileMetadata().getLabel().equals("datafile2-2.txt")) {
file2NameAltered = true;
}
if (df.getFileMetadata().getLabel().equals("datafile2-1.txt")) {
file3NameAltered = true;
}
}
// check filenames are unique
assertEquals(file1NameAltered, true);
assertEquals(file2NameAltered, true);
assertEquals(file3NameAltered, true);
}
Aggregations