Search in sources :

Example 91 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class FileRecordWriter method createPackageDataFile.

/**
 * Import the supplied batch of files as a single "package file" DataFile
 * (basically, a folder/directory, with the single associated DataFile/FileMetadata, etc.)
 * and add it to the
 * latest dataset version
 * @param files list of files, already copied to the dataset directory by rsync or otherwise.
 * @return datafile
 *
 * Consider:
 * instead of expecting to have an extra top-level directory/folder to be
 * present already, generate it here (using the standard code used for generating
 * storage identifiers for "normal" files), create it as a directory, and move
 * all the supplied files there.l
 */
private DataFile createPackageDataFile(List<File> files) {
    DataFile packageFile = new DataFile(DataFileServiceBean.MIME_TYPE_PACKAGE_FILE);
    FileUtil.generateStorageIdentifier(packageFile);
    String datasetDirectory = null;
    String folderName = null;
    long totalSize;
    if (suppliedSize != null) {
        totalSize = suppliedSize;
    } else {
        totalSize = 0L;
    }
    String gid = dataset.getAuthority() + dataset.getDoiSeparator() + dataset.getIdentifier();
    // initial default
    packageFile.setChecksumType(DataFile.ChecksumType.SHA1);
    // check system property first, otherwise use the batch job property:
    String jobChecksumType;
    if (System.getProperty("checksumType") != null) {
        jobChecksumType = System.getProperty("checksumType");
    } else {
        jobChecksumType = checksumType;
    }
    for (DataFile.ChecksumType type : DataFile.ChecksumType.values()) {
        if (jobChecksumType.equalsIgnoreCase(type.name())) {
            packageFile.setChecksumType(type);
            break;
        }
    }
    for (File file : files) {
        String path = file.getAbsolutePath();
        String relativePath = path.substring(path.indexOf(gid) + gid.length() + 1);
        // the folderName and datasetDirectory need to be initialized only once:
        if (datasetDirectory == null && folderName == null) {
            datasetDirectory = path.substring(0, path.indexOf(gid) + gid.length() + 1);
            if (relativePath != null && relativePath.indexOf(File.separatorChar) > -1) {
                folderName = relativePath.substring(0, relativePath.indexOf(File.separatorChar));
            } else {
                getJobLogger().log(Level.SEVERE, "Invalid file package (files are not in a folder)");
                jobContext.setExitStatus("FAILED");
                return null;
            }
            if (!uploadFolder.equals(folderName)) {
                getJobLogger().log(Level.SEVERE, "Folder name mismatch: " + uploadFolder + " expected, " + folderName + " found.");
                jobContext.setExitStatus("FAILED");
                return null;
            }
        }
        if (suppliedSize == null) {
            totalSize += file.length();
        }
        String checksumValue;
        // lookup the checksum value in the job's manifest hashmap
        if (jobContext.getTransientUserData() != null) {
            String manifestPath = relativePath.substring(folderName.length() + 1);
            checksumValue = ((Map<String, String>) jobContext.getTransientUserData()).get(manifestPath);
            if (checksumValue != null) {
                // remove the key, so we can check for unused checksums when the job is complete
                ((Map<String, String>) jobContext.getTransientUserData()).remove(manifestPath);
            } else {
                getJobLogger().log(Level.WARNING, "Unable to find checksum in manifest for: " + file.getAbsolutePath());
            }
        } else {
            getJobLogger().log(Level.SEVERE, "No checksum hashmap found in transientUserData");
            jobContext.setExitStatus("FAILED");
            return null;
        }
    }
    if (System.getProperty("checksumManifest") != null) {
        checksumManifest = System.getProperty("checksumManifest");
    }
    File checksumManifestFile = null;
    if (checksumManifest != null && !checksumManifest.isEmpty()) {
        String checksumManifestPath = datasetDirectory + File.separator + folderName + File.separator + checksumManifest;
        checksumManifestFile = new File(checksumManifestPath);
        if (!checksumManifestFile.exists()) {
            getJobLogger().log(Level.WARNING, "Manifest file not found");
        // TODO:
        // add code to generate the manifest, if not present? -- L.A.
        } else {
            try {
                packageFile.setChecksumValue(FileUtil.CalculateCheckSum(checksumManifestPath, packageFile.getChecksumType()));
            } catch (Exception ex) {
                getJobLogger().log(Level.SEVERE, "Failed to calculate checksum (type " + packageFile.getChecksumType() + ") " + ex.getMessage());
                jobContext.setExitStatus("FAILED");
                return null;
            }
        }
    } else {
        getJobLogger().log(Level.WARNING, "No checksumManifest property supplied");
    }
    // Move the folder to the final destination:
    if (!(new File(datasetDirectory + File.separator + folderName).renameTo(new File(datasetDirectory + File.separator + packageFile.getStorageIdentifier())))) {
        getJobLogger().log(Level.SEVERE, "Could not move the file folder to the final destination (" + datasetDirectory + File.separator + packageFile.getStorageIdentifier() + ")");
        jobContext.setExitStatus("FAILED");
        return null;
    }
    packageFile.setFilesize(totalSize);
    packageFile.setModificationTime(new Timestamp(new Date().getTime()));
    packageFile.setCreateDate(new Timestamp(new Date().getTime()));
    packageFile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
    packageFile.setOwner(dataset);
    dataset.getFiles().add(packageFile);
    packageFile.setIngestDone();
    // set metadata and add to latest version
    FileMetadata fmd = new FileMetadata();
    fmd.setLabel(folderName);
    fmd.setDataFile(packageFile);
    packageFile.getFileMetadatas().add(fmd);
    if (dataset.getLatestVersion().getFileMetadatas() == null)
        dataset.getLatestVersion().setFileMetadatas(new ArrayList<>());
    dataset.getLatestVersion().getFileMetadatas().add(fmd);
    fmd.setDatasetVersion(dataset.getLatestVersion());
    getJobLogger().log(Level.INFO, "Successfully created a file of type package");
    return packageFile;
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) ChecksumType(edu.harvard.iq.dataverse.DataFile.ChecksumType) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) ArrayList(java.util.ArrayList) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Timestamp(java.sql.Timestamp) CommandException(edu.harvard.iq.dataverse.engine.command.exception.CommandException) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) Date(java.util.Date)

Example 92 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class DatasetUtilTest method testGetThumbnailRestricted.

@Test
public void testGetThumbnailRestricted() {
    System.out.println("testGetThumbnailRestricted");
    Dataset dataset = new Dataset();
    DataFile thumbnailFile = new DataFile();
    thumbnailFile.setId(42l);
    thumbnailFile.setRestricted(true);
    dataset.setThumbnailFile(thumbnailFile);
    DatasetThumbnail result = DatasetUtil.getThumbnail(dataset);
    assertNull(result);
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) Dataset(edu.harvard.iq.dataverse.Dataset) Test(org.junit.Test)

Example 93 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class FileAccessIOTest method setUpClass.

@Before
public void setUpClass() throws IOException {
    dataverse = MocksFactory.makeDataverse();
    dataset = MocksFactory.makeDataset();
    dataset.setOwner(dataverse);
    dataset.setAuthority("tmp");
    dataset.setIdentifier("dataset");
    dataset.setStorageIdentifier("Dataset");
    dataFile = MocksFactory.makeDataFile();
    dataFile.setOwner(dataset);
    dataFile.setStorageIdentifier("DataFile");
    datasetAccess = new FileAccessIO<>(dataset);
    dataFileAccess = new FileAccessIO<>(dataFile);
    dataverseAccess = new FileAccessIO<>(dataverse);
    File file = new File("/tmp/files/tmp/dataset/Dataset");
    file.getParentFile().mkdirs();
    file.createNewFile();
    new File("/tmp/files/tmp/dataset/DataFile").createNewFile();
    try (BufferedWriter bw = new BufferedWriter(new FileWriter(file))) {
        bw.write("This is a test string");
    }
}
Also used : FileWriter(java.io.FileWriter) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File) BufferedWriter(java.io.BufferedWriter) Before(org.junit.Before)

Example 94 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class IngestUtilTest method testRecalculateDatasetVersionUNF.

@Test
public void testRecalculateDatasetVersionUNF() {
    IngestUtil.recalculateDatasetVersionUNF(null);
    DatasetVersion dsvNoFile = new DatasetVersion();
    IngestUtil.recalculateDatasetVersionUNF(dsvNoFile);
    assertEquals(null, dsvNoFile.getUNF());
    List<Dataset> datasets = new ArrayList<>();
    Dataset dataset = new Dataset();
    dataset.setProtocol("doi");
    dataset.setAuthority("fakeAuthority");
    dataset.setIdentifier("12345");
    DatasetVersion dsv1 = new DatasetVersion();
    dsv1.setDataset(dataset);
    dsv1.setId(42l);
    dsv1.setVersionState(DatasetVersion.VersionState.DRAFT);
    List<DatasetVersion> datasetVersions = new ArrayList<>();
    datasetVersions.add(dsv1);
    DataFile datafile1 = new DataFile("application/octet-stream");
    DataTable dataTable = new DataTable();
    dataTable.setUnf("unfOnDataTable");
    datafile1.setDataTable(dataTable);
    assertEquals(true, datafile1.isTabularData());
    FileMetadata fmd1 = new FileMetadata();
    fmd1.setId(1L);
    fmd1.setLabel("datafile1.txt");
    fmd1.setDataFile(datafile1);
    datafile1.getFileMetadatas().add(fmd1);
    dsv1.getFileMetadatas().add(fmd1);
    fmd1.setDatasetVersion(dsv1);
    dataset.setVersions(datasetVersions);
    datasets.add(dataset);
    assertEquals(null, dsv1.getUNF());
    IngestUtil.recalculateDatasetVersionUNF(dsv1);
    assertEquals("UNF:6:rDlgOhoEkEQQdwtLRHjmtw==", dsv1.getUNF());
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) DataTable(edu.harvard.iq.dataverse.DataTable) Dataset(edu.harvard.iq.dataverse.Dataset) MocksFactory.makeDataset(edu.harvard.iq.dataverse.mocks.MocksFactory.makeDataset) ArrayList(java.util.ArrayList) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) Test(org.junit.Test)

Example 95 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class IngestUtilTest method testCheckForDuplicateFileNamesWithDirectories.

@Test
public /**
 * Test adding duplicate file name labels with directories, including a
 * duplicate file name label in another directory.
 */
void testCheckForDuplicateFileNamesWithDirectories() throws Exception {
    SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd");
    // create dataset
    Dataset dataset = makeDataset();
    // create dataset version
    DatasetVersion datasetVersion = dataset.getEditVersion();
    datasetVersion.setCreateTime(dateFmt.parse("20001012"));
    datasetVersion.setLastUpdateTime(datasetVersion.getLastUpdateTime());
    datasetVersion.setId(MocksFactory.nextId());
    datasetVersion.setReleaseTime(dateFmt.parse("20010101"));
    datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED);
    datasetVersion.setMinorVersionNumber(0L);
    datasetVersion.setVersionNumber(1L);
    datasetVersion.setFileMetadatas(new ArrayList<>());
    // create datafiles
    List<DataFile> dataFileList = new ArrayList<>();
    DataFile datafile1 = new DataFile("application/octet-stream");
    datafile1.setStorageIdentifier("subdir/datafile1.txt");
    datafile1.setFilesize(200);
    datafile1.setModificationTime(new Timestamp(new Date().getTime()));
    datafile1.setCreateDate(new Timestamp(new Date().getTime()));
    datafile1.setPermissionModificationTime(new Timestamp(new Date().getTime()));
    datafile1.setOwner(dataset);
    datafile1.setIngestDone();
    datafile1.setChecksumType(DataFile.ChecksumType.SHA1);
    datafile1.setChecksumValue("Unknown");
    // set metadata and add version
    FileMetadata fmd1 = new FileMetadata();
    fmd1.setId(1L);
    fmd1.setLabel("datafile1.txt");
    fmd1.setDirectoryLabel("subdir");
    fmd1.setDataFile(datafile1);
    datafile1.getFileMetadatas().add(fmd1);
    datasetVersion.getFileMetadatas().add(fmd1);
    fmd1.setDatasetVersion(datasetVersion);
    dataFileList.add(datafile1);
    DataFile datafile2 = new DataFile("application/octet-stream");
    datafile2.setStorageIdentifier("subdir/datafile2.txt");
    datafile2.setFilesize(200);
    datafile2.setModificationTime(new Timestamp(new Date().getTime()));
    datafile2.setCreateDate(new Timestamp(new Date().getTime()));
    datafile2.setPermissionModificationTime(new Timestamp(new Date().getTime()));
    datafile2.setOwner(dataset);
    datafile2.setIngestDone();
    datafile2.setChecksumType(DataFile.ChecksumType.SHA1);
    datafile2.setChecksumValue("Unknown");
    // set metadata and add version
    FileMetadata fmd2 = new FileMetadata();
    fmd2.setId(2L);
    fmd2.setLabel("datafile2.txt");
    fmd2.setDirectoryLabel("subdir");
    fmd2.setDataFile(datafile2);
    datafile2.getFileMetadatas().add(fmd2);
    datasetVersion.getFileMetadatas().add(fmd2);
    fmd2.setDatasetVersion(datasetVersion);
    dataFileList.add(datafile2);
    DataFile datafile3 = new DataFile("application/octet-stream");
    datafile3.setStorageIdentifier("datafile2.txt");
    datafile3.setFilesize(200);
    datafile3.setModificationTime(new Timestamp(new Date().getTime()));
    datafile3.setCreateDate(new Timestamp(new Date().getTime()));
    datafile3.setPermissionModificationTime(new Timestamp(new Date().getTime()));
    datafile3.setOwner(dataset);
    datafile3.setIngestDone();
    datafile3.setChecksumType(DataFile.ChecksumType.SHA1);
    datafile3.setChecksumValue("Unknown");
    // set metadata and add version
    FileMetadata fmd3 = new FileMetadata();
    fmd3.setId(3L);
    fmd3.setLabel("datafile2.txt");
    fmd3.setDataFile(datafile3);
    datafile3.getFileMetadatas().add(fmd3);
    dataFileList.add(datafile3);
    IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList);
    boolean file1NameAltered = false;
    boolean file2NameAltered = false;
    boolean file3NameAltered = true;
    for (DataFile df : dataFileList) {
        if (df.getFileMetadata().getLabel().equals("datafile1-1.txt")) {
            file1NameAltered = true;
        }
        if (df.getFileMetadata().getLabel().equals("datafile2-1.txt")) {
            file2NameAltered = true;
        }
        if (df.getFileMetadata().getLabel().equals("datafile2.txt")) {
            file3NameAltered = false;
        }
    }
    // check filenames are unique
    assertEquals(file1NameAltered, true);
    assertEquals(file2NameAltered, true);
    assertEquals(file3NameAltered, false);
    // add duplicate file in root
    datasetVersion.getFileMetadatas().add(fmd3);
    fmd3.setDatasetVersion(datasetVersion);
    // try to add data files with "-1" duplicates and see if it gets incremented to "-2"
    IngestUtil.checkForDuplicateFileNamesFinal(datasetVersion, dataFileList);
    for (DataFile df : dataFileList) {
        if (df.getFileMetadata().getLabel().equals("datafile1-2.txt")) {
            file1NameAltered = true;
        }
        if (df.getFileMetadata().getLabel().equals("datafile2-2.txt")) {
            file2NameAltered = true;
        }
        if (df.getFileMetadata().getLabel().equals("datafile2-1.txt")) {
            file3NameAltered = true;
        }
    }
    // check filenames are unique
    assertEquals(file1NameAltered, true);
    assertEquals(file2NameAltered, true);
    assertEquals(file3NameAltered, true);
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) Dataset(edu.harvard.iq.dataverse.Dataset) MocksFactory.makeDataset(edu.harvard.iq.dataverse.mocks.MocksFactory.makeDataset) ArrayList(java.util.ArrayList) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) SimpleDateFormat(java.text.SimpleDateFormat) Timestamp(java.sql.Timestamp) Date(java.util.Date) Test(org.junit.Test)

Aggregations

DataFile (edu.harvard.iq.dataverse.DataFile)111 Dataset (edu.harvard.iq.dataverse.Dataset)39 IOException (java.io.IOException)39 FileMetadata (edu.harvard.iq.dataverse.FileMetadata)30 ArrayList (java.util.ArrayList)25 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)20 File (java.io.File)20 FileNotFoundException (java.io.FileNotFoundException)18 Path (javax.ws.rs.Path)18 Dataverse (edu.harvard.iq.dataverse.Dataverse)17 FileInputStream (java.io.FileInputStream)16 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)14 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)13 Date (java.util.Date)13 GET (javax.ws.rs.GET)13 Test (org.junit.Test)13 Timestamp (java.sql.Timestamp)11 InputStream (java.io.InputStream)10 DataVariable (edu.harvard.iq.dataverse.datavariable.DataVariable)8 FileOutputStream (java.io.FileOutputStream)8