Search in sources :

Example 6 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class DatasetUtilTest method testGetThumbnailCandidates.

/**
 * Test of getThumbnailCandidates method, of class DatasetUtil.
 */
@Test
public void testGetThumbnailCandidates() {
    assertEquals(new ArrayList<>(), DatasetUtil.getThumbnailCandidates(null, false));
    Dataset dataset = MocksFactory.makeDataset();
    DataFile dataFile = MocksFactory.makeDataFile();
    dataFile.setContentType("image/");
    dataFile.setOwner(dataset);
    dataFile.setStorageIdentifier("file://src/test/resources/images/coffeeshop.png");
    System.out.println(ImageThumbConverter.isThumbnailAvailable(dataFile));
    DatasetVersion version = dataset.getCreateVersion();
    List<FileMetadata> fmds = new ArrayList<>();
    fmds.add(MocksFactory.addFileMetadata(dataFile));
    version.setFileMetadatas(fmds);
    assertEquals(new ArrayList<>(), DatasetUtil.getThumbnailCandidates(dataset, false));
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) Dataset(edu.harvard.iq.dataverse.Dataset) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) ArrayList(java.util.ArrayList) DatasetVersion(edu.harvard.iq.dataverse.DatasetVersion) Test(org.junit.Test)

Example 7 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class DataConverter method runFormatConversion.

// end of performformatconversion();
// Method for (subsettable) file format conversion.
// The method needs the subsettable file saved on disk as in the
// TAB-delimited format.
// Meaning, if this is a remote subsettable file, it needs to be downloaded
// and stored locally as a temporary file; and if it's a fixed-field file, it
// needs to be converted to TAB-delimited, before you can feed the file
// to this method. (See performFormatConversion() method)
// The method below takes the tab file and sends it to the R server
// (possibly running on a remote host) and gets back the transformed copy,
// providing error-checking and diagnostics in the process.
// This is mostly Akio Sone's code from DVN3.
// (hence some obsolete elements in the comment above: ALL of the tabular
// data files in Dataverse are saved in tab-delimited format - we no longer
// support fixed-field files!
private static File runFormatConversion(DataFile file, File tabFile, String formatRequested) {
    if (formatRequested.equals(FILE_TYPE_TAB)) {
        return tabFile;
    }
    File formatConvertedFile = null;
    // create the service instance
    RemoteDataFrameService dfs = new RemoteDataFrameService();
    if ("RData".equals(formatRequested)) {
        List<DataVariable> dataVariables = file.getDataTable().getDataVariables();
        Map<String, Map<String, String>> vls = null;
        vls = getValueTableForRequestedVariables(dataVariables);
        logger.fine("format conversion: variables(getDataVariableForRequest())=" + dataVariables + "\n");
        logger.fine("format conversion: variables(dataVariables)=" + dataVariables + "\n");
        logger.fine("format conversion: value table(vls)=" + vls + "\n");
        RJobRequest sro = new RJobRequest(dataVariables, vls);
        sro.setTabularDataFileName(tabFile.getAbsolutePath());
        sro.setRequestType(SERVICE_REQUEST_CONVERT);
        sro.setFormatRequested(FILE_TYPE_RDATA);
        // execute the service
        Map<String, String> resultInfo = dfs.execute(sro);
        // resultInfo.put("offlineCitation", citation);
        logger.fine("resultInfo=" + resultInfo + "\n");
        if ("true".equals(resultInfo.get("RexecError"))) {
            logger.fine("R-runtime error trying to convert a file.");
            return null;
        } else {
            String dataFrameFileName = resultInfo.get("dataFrameFileName");
            logger.fine("data frame file name: " + dataFrameFileName);
            formatConvertedFile = new File(dataFrameFileName);
        }
    } else if ("prep".equals(formatRequested)) {
        formatConvertedFile = dfs.runDataPreprocessing(file);
    } else {
        logger.warning("Unsupported file format requested: " + formatRequested);
        return null;
    }
    if (formatConvertedFile != null && formatConvertedFile.exists()) {
        logger.fine("frmtCnvrtdFile:length=" + formatConvertedFile.length());
    } else {
        logger.warning("Format-converted file was not properly created.");
        return null;
    }
    return formatConvertedFile;
}
Also used : DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 8 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class DataConverter method performFormatConversion.

public static StorageIO<DataFile> performFormatConversion(DataFile file, StorageIO<DataFile> storageIO, String formatRequested, String formatType) {
    if (!file.isTabularData()) {
        return null;
    }
    // we don't need to do anything:
    if (formatRequested.equals(FILE_TYPE_TAB) && file.getContentType().equals("text/tab-separated-values")) {
        return storageIO;
    }
    InputStream convertedFileStream = null;
    long convertedFileSize = 0;
    // format:
    try {
        convertedFileStream = Channels.newInputStream((ReadableByteChannel) storageIO.openAuxChannel(formatRequested));
        convertedFileSize = storageIO.getAuxObjectSize(formatRequested);
    } catch (IOException ioex) {
        logger.fine("No cached copy for file format " + formatRequested + ", file " + file.getStorageIdentifier());
        convertedFileStream = null;
    }
    // If not cached, run the conversion:
    if (convertedFileStream == null) {
        File tabFile = null;
        boolean tempFilesRequired = false;
        try {
            Path tabFilePath = storageIO.getFileSystemPath();
            tabFile = tabFilePath.toFile();
        } catch (UnsupportedDataAccessOperationException uoex) {
            // this means there is no direct filesystem path for this object; it's ok!
            logger.fine("Could not open source file as a local Path - will go the temp file route.");
            tempFilesRequired = true;
        } catch (IOException ioex) {
            // this is likely a fatal condition, as in, the file is unaccessible:
            return null;
        }
        if (tempFilesRequired) {
            ReadableByteChannel tabFileChannel = null;
            try {
                logger.fine("opening datafFileIO for the source tabular file...");
                storageIO.open();
                tabFileChannel = storageIO.getReadChannel();
                FileChannel tempFileChannel;
                tabFile = File.createTempFile("tempTabFile", ".tmp");
                tempFileChannel = new FileOutputStream(tabFile).getChannel();
                tempFileChannel.transferFrom(tabFileChannel, 0, storageIO.getSize());
            } catch (IOException ioex) {
                logger.warning("caught IOException trying to store tabular file " + storageIO.getDataFile().getStorageIdentifier() + " as a temp file.");
                return null;
            }
        }
        if (tabFile == null) {
            return null;
        }
        if (tabFile.length() > 0) {
            File formatConvertedFile = runFormatConversion(file, tabFile, formatRequested);
            // cache the result for future use:
            if (formatConvertedFile != null && formatConvertedFile.exists()) {
                try {
                    storageIO.savePathAsAux(Paths.get(formatConvertedFile.getAbsolutePath()), formatRequested);
                } catch (IOException ex) {
                    logger.warning("failed to save cached format " + formatRequested + " for " + file.getStorageIdentifier());
                // We'll assume that this is a non-fatal condition.
                }
                // re-open the generated file:
                try {
                    convertedFileStream = new FileInputStream(formatConvertedFile);
                    convertedFileSize = formatConvertedFile.length();
                } catch (FileNotFoundException ioex) {
                    logger.warning("Failed to open generated format " + formatRequested + " for " + file.getStorageIdentifier());
                    return null;
                }
            }
        }
    }
    // download API instance writer:
    if (convertedFileStream != null && convertedFileSize > 0) {
        InputStreamIO inputStreamIO = null;
        try {
            inputStreamIO = new InputStreamIO(convertedFileStream, convertedFileSize);
        } catch (IOException ioex) {
            return null;
        }
        inputStreamIO.setMimeType(formatType);
        String fileName = storageIO.getFileName();
        if (fileName == null || fileName.isEmpty()) {
            fileName = "f" + file.getId().toString();
        }
        inputStreamIO.setFileName(generateAltFileName(formatRequested, fileName));
        return inputStreamIO;
    }
    return null;
}
Also used : Path(java.nio.file.Path) ReadableByteChannel(java.nio.channels.ReadableByteChannel) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileChannel(java.nio.channels.FileChannel) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File)

Example 9 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class FileAccessIO method getDatasetDirectory.

private String getDatasetDirectory() throws IOException {
    if (dvObject == null) {
        throw new IOException("No DvObject defined in the Data Access Object");
    }
    Path datasetDirectoryPath = null;
    if (dvObject instanceof Dataset) {
        datasetDirectoryPath = this.getDataset().getFileSystemDirectory();
    } else if (dvObject instanceof DataFile) {
        datasetDirectoryPath = this.getDataFile().getOwner().getFileSystemDirectory();
    } else if (dvObject instanceof Dataverse) {
        throw new IOException("FileAccessIO: Dataverses are not a supported dvObject");
    }
    if (datasetDirectoryPath == null) {
        throw new IOException("Could not determine the filesystem directory of the parent dataset.");
    }
    String datasetDirectory = datasetDirectoryPath.toString();
    if (dvObject.getStorageIdentifier() == null || dvObject.getStorageIdentifier().isEmpty()) {
        throw new IOException("Data Access: No local storage identifier defined for this datafile.");
    }
    return datasetDirectory;
}
Also used : Path(java.nio.file.Path) DataFile(edu.harvard.iq.dataverse.DataFile) Dataset(edu.harvard.iq.dataverse.Dataset) IOException(java.io.IOException) Dataverse(edu.harvard.iq.dataverse.Dataverse)

Example 10 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class S3AccessIO method open.

@Override
public void open(DataAccessOption... options) throws IOException {
    if (s3 == null) {
        throw new IOException("ERROR: s3 not initialised. ");
    }
    if (bucketName == null || !s3.doesBucketExist(bucketName)) {
        throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets.");
    }
    DataAccessRequest req = this.getRequest();
    if (isWriteAccessRequested(options)) {
        isWriteAccess = true;
        isReadAccess = false;
    } else {
        isWriteAccess = false;
        isReadAccess = true;
    }
    if (dvObject instanceof DataFile) {
        String storageIdentifier = dvObject.getStorageIdentifier();
        DataFile dataFile = this.getDataFile();
        if (req != null && req.getParameter("noVarHeader") != null) {
            this.setNoVarHeader(true);
        }
        if (storageIdentifier == null || "".equals(storageIdentifier)) {
            throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile.");
        }
        if (isReadAccess) {
            key = getMainFileKey();
            S3Object s3object = s3.getObject(new GetObjectRequest(bucketName, key));
            InputStream in = s3object.getObjectContent();
            if (in == null) {
                throw new IOException("Cannot get Object" + key);
            }
            this.setInputStream(in);
            setChannel(Channels.newChannel(in));
            this.setSize(s3object.getObjectMetadata().getContentLength());
            if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
                List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                String varHeaderLine = generateVariableHeader(datavariables);
                this.setVarHeader(varHeaderLine);
            }
        } else if (isWriteAccess) {
            key = dataFile.getOwner().getAuthority() + "/" + this.getDataFile().getOwner().getIdentifier();
            if (storageIdentifier.startsWith(S3_IDENTIFIER_PREFIX + "://")) {
                key += "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1);
            } else {
                key += "/" + storageIdentifier;
                dvObject.setStorageIdentifier(S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + storageIdentifier);
            }
        }
        this.setMimeType(dataFile.getContentType());
        try {
            this.setFileName(dataFile.getFileMetadata().getLabel());
        } catch (Exception ex) {
            this.setFileName("unknown");
        }
    } else if (dvObject instanceof Dataset) {
        Dataset dataset = this.getDataset();
        key = dataset.getAuthority() + "/" + dataset.getIdentifier();
        dataset.setStorageIdentifier(S3_IDENTIFIER_PREFIX + "://" + key);
    } else if (dvObject instanceof Dataverse) {
        throw new IOException("Data Access: Invalid DvObject type : Dataverse");
    } else {
        throw new IOException("Data Access: Invalid DvObject type");
    }
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) Dataset(edu.harvard.iq.dataverse.Dataset) FileNotFoundException(java.io.FileNotFoundException) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable) IOException(java.io.IOException) Dataverse(edu.harvard.iq.dataverse.Dataverse) MultiObjectDeleteException(com.amazonaws.services.s3.model.MultiObjectDeleteException) FileNotFoundException(java.io.FileNotFoundException) AmazonClientException(com.amazonaws.AmazonClientException) IOException(java.io.IOException) SdkClientException(com.amazonaws.SdkClientException) DataFile(edu.harvard.iq.dataverse.DataFile) S3Object(com.amazonaws.services.s3.model.S3Object) GetObjectRequest(com.amazonaws.services.s3.model.GetObjectRequest)

Aggregations

DataFile (edu.harvard.iq.dataverse.DataFile)111 Dataset (edu.harvard.iq.dataverse.Dataset)39 IOException (java.io.IOException)39 FileMetadata (edu.harvard.iq.dataverse.FileMetadata)30 ArrayList (java.util.ArrayList)25 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)20 File (java.io.File)20 FileNotFoundException (java.io.FileNotFoundException)18 Path (javax.ws.rs.Path)18 Dataverse (edu.harvard.iq.dataverse.Dataverse)17 FileInputStream (java.io.FileInputStream)16 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)14 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)13 Date (java.util.Date)13 GET (javax.ws.rs.GET)13 Test (org.junit.Test)13 Timestamp (java.sql.Timestamp)11 InputStream (java.io.InputStream)10 DataVariable (edu.harvard.iq.dataverse.datavariable.DataVariable)8 FileOutputStream (java.io.FileOutputStream)8