Search in sources :

Example 46 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class IngestServiceBean method startIngestJobs.

// TODO: consider creating a version of this method that would take
// datasetversion as the argument.
// -- L.A. 4.6
// @Asynchronous - just an experiment...
public void startIngestJobs(Dataset dataset, AuthenticatedUser user) {
    int count = 0;
    List<DataFile> scheduledFiles = new ArrayList<>();
    IngestMessage ingestMessage = null;
    for (DataFile dataFile : dataset.getFiles()) {
        if (dataFile.isIngestScheduled()) {
            // todo: investigate why when calling save with the file object
            // gotten from the loop, the roles assignment added at create is removed
            // (switching to refinding via id resolves that)
            dataFile = fileService.find(dataFile.getId());
            long ingestSizeLimit = -1;
            try {
                ingestSizeLimit = systemConfig.getTabularIngestSizeLimit(getTabDataReaderByMimeType(dataFile.getContentType()).getFormatName());
            } catch (IOException ioex) {
                logger.warning("IO Exception trying to retrieve the ingestable format identifier from the plugin for type " + dataFile.getContentType() + " (non-fatal);");
            }
            if (ingestSizeLimit == -1 || dataFile.getFilesize() < ingestSizeLimit) {
                dataFile.SetIngestInProgress();
                dataFile = fileService.save(dataFile);
                scheduledFiles.add(dataFile);
                logger.fine("Attempting to queue the file " + dataFile.getFileMetadata().getLabel() + " for ingest, for dataset: " + dataset.getGlobalId());
                count++;
            } else {
                dataFile.setIngestDone();
                dataFile = fileService.save(dataFile);
                logger.info("Skipping tabular ingest of the file " + dataFile.getFileMetadata().getLabel() + ", because of the size limit (set to " + ingestSizeLimit + " bytes).");
            }
        }
    }
    if (count > 0) {
        String info = "Ingest of " + count + " tabular data file(s) is in progress.";
        logger.info(info);
        datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.Ingest, (user != null) ? user.getId() : null, info);
        DataFile[] scheduledFilesArray = (DataFile[]) scheduledFiles.toArray(new DataFile[count]);
        scheduledFiles = null;
        // Sort ingest jobs by file size:
        Arrays.sort(scheduledFilesArray, new Comparator<DataFile>() {

            @Override
            public int compare(DataFile d1, DataFile d2) {
                long a = d1.getFilesize();
                long b = d2.getFilesize();
                return Long.valueOf(a).compareTo(b);
            }
        });
        ingestMessage = new IngestMessage(IngestMessage.INGEST_MESAGE_LEVEL_INFO);
        for (int i = 0; i < count; i++) {
            ingestMessage.addFileId(scheduledFilesArray[i].getId());
            logger.fine("Sorted order: " + i + " (size=" + scheduledFilesArray[i].getFilesize() + ")");
        }
        QueueConnection conn = null;
        QueueSession session = null;
        QueueSender sender = null;
        try {
            conn = factory.createQueueConnection();
            session = conn.createQueueSession(false, 0);
            sender = session.createSender(queue);
            // ingestMessage.addFile(new File(tempFileLocation));
            Message message = session.createObjectMessage(ingestMessage);
            // try {
            sender.send(message);
        // } catch (JMSException ex) {
        // ex.printStackTrace();
        // }
        } catch (JMSException ex) {
            ex.printStackTrace();
        // throw new IOException(ex.getMessage());
        } finally {
            try {
                if (sender != null) {
                    sender.close();
                }
                if (session != null) {
                    session.close();
                }
                if (conn != null) {
                    conn.close();
                }
            } catch (JMSException ex) {
                ex.printStackTrace();
            }
        }
    }
}
Also used : FacesMessage(javax.faces.application.FacesMessage) Message(javax.jms.Message) ArrayList(java.util.ArrayList) JMSException(javax.jms.JMSException) IOException(java.io.IOException) DataFile(edu.harvard.iq.dataverse.DataFile) QueueConnection(javax.jms.QueueConnection) QueueSender(javax.jms.QueueSender) QueueSession(javax.jms.QueueSession)

Example 47 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class IngestServiceBean method fixMissingOriginalType.

// This method fixes a datatable object that's missing the format type of
// the ingested original. It will check the saved original file to
// determine the type.
private void fixMissingOriginalType(long fileId) {
    DataFile dataFile = fileService.find(fileId);
    if (dataFile != null && dataFile.isTabularData()) {
        String originalFormat = dataFile.getDataTable().getOriginalFileFormat();
        Long datatableId = dataFile.getDataTable().getId();
        if (StringUtil.isEmpty(originalFormat) || originalFormat.equals(FileUtil.MIME_TYPE_TAB)) {
            // We need to determine the mime type of the saved original
            // and save it in the database.
            // 
            // First, we need access to the file. Note that the code below
            // works with any supported StorageIO driver (although, as of now
            // all the production installations out there are only using filesystem
            // access; but just in case)
            // The FileUtil method that determines the type takes java.io.File
            // as an argument. So for StorageIO drivers that provide local
            // file access, we'll just go directly to the stored file. For
            // swift and similar implementations, we'll read the saved aux
            // channel and save it as a local temp file.
            StorageIO<DataFile> storageIO;
            File savedOriginalFile = null;
            boolean tempFileRequired = false;
            try {
                storageIO = dataFile.getStorageIO();
                storageIO.open();
                if (storageIO.isLocalFile()) {
                    try {
                        savedOriginalFile = storageIO.getAuxObjectAsPath(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION).toFile();
                    } catch (IOException ioex) {
                        // do nothing, just make sure savedOriginalFile is still null:
                        savedOriginalFile = null;
                    }
                }
                if (savedOriginalFile == null) {
                    tempFileRequired = true;
                    ReadableByteChannel savedOriginalChannel = (ReadableByteChannel) storageIO.openAuxChannel(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
                    savedOriginalFile = File.createTempFile("tempSavedOriginal", ".tmp");
                    FileChannel tempSavedOriginalChannel = new FileOutputStream(savedOriginalFile).getChannel();
                    tempSavedOriginalChannel.transferFrom(savedOriginalChannel, 0, storageIO.getAuxObjectSize(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION));
                }
            } catch (Exception ex) {
                logger.warning("Exception " + ex.getClass() + " caught trying to open StorageIO channel for the saved original; (datafile id=" + fileId + ", datatable id=" + datatableId + "): " + ex.getMessage());
                savedOriginalFile = null;
            }
            if (savedOriginalFile == null) {
                logger.warning("Could not obtain the saved original file as a java.io.File! (datafile id=" + fileId + ", datatable id=" + datatableId + ")");
                return;
            }
            String fileTypeDetermined = null;
            try {
                fileTypeDetermined = FileUtil.determineFileType(savedOriginalFile, "");
            } catch (IOException ioex) {
                logger.warning("Caught exception trying to determine original file type (datafile id=" + fileId + ", datatable id=" + datatableId + "): " + ioex.getMessage());
            }
            // If we had to create a temp file, delete it now:
            if (tempFileRequired) {
                savedOriginalFile.delete();
            }
            if (fileTypeDetermined == null) {
                logger.warning("Failed to determine preserved original file type. (datafile id=" + fileId + ", datatable id=" + datatableId + ")");
                return;
            }
            // it really means it must be a CSV file.
            if (fileTypeDetermined.startsWith("text/plain")) {
                fileTypeDetermined = FileUtil.MIME_TYPE_CSV;
            }
            // and, finally, if it is still "application/octet-stream", it must be Excel:
            if (FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT.equals(fileTypeDetermined)) {
                fileTypeDetermined = FileUtil.MIME_TYPE_XLSX;
            }
            logger.info("Original file type determined: " + fileTypeDetermined + " (file id=" + fileId + ", datatable id=" + datatableId + "; file path: " + savedOriginalFile.getAbsolutePath() + ")");
            // save permanently in the database:
            dataFile.getDataTable().setOriginalFileFormat(fileTypeDetermined);
            fileService.saveDataTable(dataFile.getDataTable());
        } else {
            logger.info("DataFile id=" + fileId + "; original type already present: " + originalFormat);
        }
    } else {
        logger.warning("DataFile id=" + fileId + ": No such DataFile!");
    }
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) ReadableByteChannel(java.nio.channels.ReadableByteChannel) FileChannel(java.nio.channels.FileChannel) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File) FileExceedsMaxSizeException(edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException) UnsupportedDataAccessOperationException(edu.harvard.iq.dataverse.dataaccess.UnsupportedDataAccessOperationException) ParseException(java.text.ParseException) JMSException(javax.jms.JMSException) FileNotFoundException(java.io.FileNotFoundException) EJBException(javax.ejb.EJBException) IOException(java.io.IOException)

Example 48 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class IngestServiceBean method extractMetadata.

/* 
     * extractMetadata: 
     * framework for extracting metadata from uploaded files. The results will 
     * be used to populate the metadata of the Dataset to which the file belongs. 
    */
public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException {
    boolean ingestSuccessful = false;
    FileInputStream tempFileInputStream = null;
    try {
        tempFileInputStream = new FileInputStream(new File(tempFileLocation));
    } catch (FileNotFoundException notfoundEx) {
        throw new IOException("Could not open temp file " + tempFileLocation);
    }
    // Locate metadata extraction plugin for the file format by looking
    // it up with the Ingest Service Provider Registry:
    // FileMetadataExtractor extractorPlugin = IngestSP.getMetadataExtractorByMIMEType(dfile.getContentType());
    FileMetadataExtractor extractorPlugin = new FITSFileMetadataExtractor();
    FileMetadataIngest extractedMetadata = extractorPlugin.ingest(new BufferedInputStream(tempFileInputStream));
    Map<String, Set<String>> extractedMetadataMap = extractedMetadata.getMetadataMap();
    // Store the fields and values we've gathered for safe-keeping:
    // from 3.6:
    // attempt to ingest the extracted metadata into the database;
    // TODO: this should throw an exception if anything goes wrong.
    FileMetadata fileMetadata = dataFile.getFileMetadata();
    if (extractedMetadataMap != null) {
        logger.fine("Ingest Service: Processing extracted metadata;");
        if (extractedMetadata.getMetadataBlockName() != null) {
            logger.fine("Ingest Service: This metadata belongs to the " + extractedMetadata.getMetadataBlockName() + " metadata block.");
            processDatasetMetadata(extractedMetadata, editVersion);
        }
        processFileLevelMetadata(extractedMetadata, fileMetadata);
    }
    ingestSuccessful = true;
    return ingestSuccessful;
}
Also used : Set(java.util.Set) LinkedHashSet(java.util.LinkedHashSet) FileMetadataIngest(edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataIngest) FileNotFoundException(java.io.FileNotFoundException) FileMetadata(edu.harvard.iq.dataverse.FileMetadata) FileMetadataExtractor(edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataExtractor) FITSFileMetadataExtractor(edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.fits.FITSFileMetadataExtractor) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) FITSFileMetadataExtractor(edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.fits.FITSFileMetadataExtractor) BufferedInputStream(java.io.BufferedInputStream) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File)

Example 49 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class IngestServiceBean method produceCharacterSummaryStatistics.

public void produceCharacterSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException {
    for (int i = 0; i < dataFile.getDataTable().getVarQuantity(); i++) {
        if (dataFile.getDataTable().getDataVariables().get(i).isTypeCharacter()) {
            StorageIO<DataFile> storageIO = dataFile.getStorageIO();
            storageIO.open();
            logger.fine("subsetting character vector");
            String[] variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
            // calculateCharacterSummaryStatistics(dataFile, i, variableVector);
            // calculate the UNF while we are at it:
            logger.fine("Calculating UNF on a String vector");
            calculateUNF(dataFile, i, variableVector);
            logger.fine("Done! (character)");
            variableVector = null;
        }
    }
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) FileInputStream(java.io.FileInputStream)

Example 50 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class HarvesterServiceBean method deleteHarvestedDataset.

private void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Logger hdLogger) {
    // Purge all the SOLR documents associated with this client from the
    // index server:
    indexService.deleteHarvestedDocuments(dataset);
    try {
        // DeleteFileCommand on them.
        for (DataFile harvestedFile : dataset.getFiles()) {
            DataFile merged = em.merge(harvestedFile);
            em.remove(merged);
            harvestedFile = null;
        }
        dataset.setFiles(null);
        Dataset merged = em.merge(dataset);
        engineService.submit(new DeleteDatasetCommand(request, merged));
    } catch (IllegalCommandException ex) {
    // TODO: log the result
    } catch (PermissionException ex) {
    // TODO: log the result
    } catch (CommandException ex) {
    // TODO: log the result
    }
// TODO: log the success result
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) PermissionException(edu.harvard.iq.dataverse.engine.command.exception.PermissionException) Dataset(edu.harvard.iq.dataverse.Dataset) DeleteDatasetCommand(edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand) IllegalCommandException(edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException) CommandException(edu.harvard.iq.dataverse.engine.command.exception.CommandException) IllegalCommandException(edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException)

Aggregations

DataFile (edu.harvard.iq.dataverse.DataFile)111 Dataset (edu.harvard.iq.dataverse.Dataset)39 IOException (java.io.IOException)39 FileMetadata (edu.harvard.iq.dataverse.FileMetadata)30 ArrayList (java.util.ArrayList)25 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)20 File (java.io.File)20 FileNotFoundException (java.io.FileNotFoundException)18 Path (javax.ws.rs.Path)18 Dataverse (edu.harvard.iq.dataverse.Dataverse)17 FileInputStream (java.io.FileInputStream)16 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)14 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)13 Date (java.util.Date)13 GET (javax.ws.rs.GET)13 Test (org.junit.Test)13 Timestamp (java.sql.Timestamp)11 InputStream (java.io.InputStream)10 DataVariable (edu.harvard.iq.dataverse.datavariable.DataVariable)8 FileOutputStream (java.io.FileOutputStream)8