Search in sources :

Example 1 with FileExceedsMaxSizeException

use of edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException in project dataverse by IQSS.

the class FileUtil method createDataFiles.

public static List<DataFile> createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, SystemConfig systemConfig) throws IOException {
    List<DataFile> datafiles = new ArrayList<>();
    String warningMessage = null;
    // save the file, in the temporary location for now:
    Path tempFile = null;
    Long fileSizeLimit = systemConfig.getMaxFileUploadSize();
    if (getFilesTempDirectory() != null) {
        tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload");
        // "temporary" location is the key here; this is why we are not using
        // the DataStore framework for this - the assumption is that
        // temp files will always be stored on the local filesystem.
        // -- L.A. Jul. 2014
        logger.fine("Will attempt to save the file as: " + tempFile.toString());
        Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
        // A file size check, before we do anything else:
        // (note that "no size limit set" = "unlimited")
        // (also note, that if this is a zip file, we'll be checking
        // the size limit for each of the individual unpacked files)
        Long fileSize = tempFile.toFile().length();
        if (fileSizeLimit != null && fileSize > fileSizeLimit) {
            try {
                tempFile.toFile().delete();
            } catch (Exception ex) {
            }
            throw new IOException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), fileSize.toString(), fileSizeLimit.toString()));
        }
    } else {
        throw new IOException("Temp directory is not configured.");
    }
    logger.fine("mime type supplied: " + suppliedContentType);
    // Let's try our own utilities (Jhove, etc.) to determine the file type
    // of the uploaded file. (We may already have a mime type supplied for this
    // file - maybe the type that the browser recognized on upload; or, if
    // it's a harvest, maybe the remote server has already given us the type
    // for this file... with our own type utility we may or may not do better
    // than the type supplied:
    // -- L.A.
    String recognizedType = null;
    String finalType = null;
    try {
        recognizedType = determineFileType(tempFile.toFile(), fileName);
        logger.fine("File utility recognized the file as " + recognizedType);
        if (recognizedType != null && !recognizedType.equals("")) {
            if (suppliedContentType == null || suppliedContentType.equals("") || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_DEFAULT) || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_BINARY) || (ingestableAsTabular(suppliedContentType) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV_ALT) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX)) || ingestableAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) || recognizedType.equals(MIME_TYPE_ZIP)) {
                finalType = recognizedType;
            }
        }
    } catch (Exception ex) {
        logger.warning("Failed to run the file utility mime type check on file " + fileName);
    }
    if (finalType == null) {
        finalType = (suppliedContentType == null || suppliedContentType.equals("")) ? MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType;
    }
    if (finalType.equals("application/fits-gzipped")) {
        InputStream uncompressedIn = null;
        String finalFileName = fileName;
        // since we are going to uncompress it:
        if (fileName != null && fileName.matches(".*\\.gz$")) {
            finalFileName = fileName.replaceAll("\\.gz$", "");
        }
        DataFile datafile = null;
        try {
            uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile()));
            File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit);
            datafile = createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm());
        } catch (IOException | FileExceedsMaxSizeException ioex) {
            datafile = null;
        } finally {
            if (uncompressedIn != null) {
                try {
                    uncompressedIn.close();
                } catch (IOException e) {
                }
            }
        }
        // down, from the original, uncompressed file.
        if (datafile != null) {
            // remove the compressed temp file:
            try {
                tempFile.toFile().delete();
            } catch (SecurityException ex) {
                // (this is very non-fatal)
                logger.warning("Failed to delete temporary file " + tempFile.toString());
            }
            datafiles.add(datafile);
            return datafiles;
        }
    // If it's a ZIP file, we are going to unpack it and create multiple
    // DataFile objects from its contents:
    } else if (finalType.equals("application/zip")) {
        ZipInputStream unZippedIn = null;
        ZipEntry zipEntry = null;
        int fileNumberLimit = systemConfig.getZipUploadFilesLimit();
        try {
            Charset charset = null;
            if (charset != null) {
                unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset);
            } else {
                unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()));
            }
            while (true) {
                try {
                    zipEntry = unZippedIn.getNextEntry();
                } catch (IllegalArgumentException iaex) {
                    // Note:
                    // ZipInputStream documentation doesn't even mention that
                    // getNextEntry() throws an IllegalArgumentException!
                    // but that's what happens if the file name of the next
                    // entry is not valid in the current CharSet.
                    // -- L.A.
                    warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is.";
                    logger.warning(warningMessage);
                    throw new IOException();
                }
                if (zipEntry == null) {
                    break;
                }
                if (!zipEntry.isDirectory()) {
                    if (datafiles.size() > fileNumberLimit) {
                        logger.warning("Zip upload - too many files.");
                        warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + "); please upload a zip archive with fewer files, if you want them to be ingested " + "as individual DataFiles.";
                        throw new IOException();
                    }
                    String fileEntryName = zipEntry.getName();
                    logger.fine("ZipEntry, file: " + fileEntryName);
                    if (fileEntryName != null && !fileEntryName.equals("")) {
                        String shortName = fileEntryName.replaceFirst("^.*[\\/]", "");
                        // start with "._")
                        if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) {
                            // OK, this seems like an OK file entry - we'll try
                            // to read it and create a DataFile with it:
                            File unZippedTempFile = saveInputStreamInTempFile(unZippedIn, fileSizeLimit);
                            DataFile datafile = createSingleDataFile(version, unZippedTempFile, shortName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm(), false);
                            if (!fileEntryName.equals(shortName)) {
                                // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes),
                                // we'll extract the directory name, then a) strip the leading and trailing slashes;
                                // and b) replace all the back slashes with regular ones and b) replace any multiple
                                // slashes with a single slash:
                                String directoryName = fileEntryName.replaceFirst("[\\/][\\/]*[^\\/]*$", "").replaceFirst("^[\\/]*", "").replaceAll("[\\/][\\/]*", "/");
                                if (!"".equals(directoryName)) {
                                    logger.fine("setting the directory label to " + directoryName);
                                    datafile.getFileMetadata().setDirectoryLabel(directoryName);
                                }
                            }
                            if (datafile != null) {
                                // We have created this datafile with the mime type "unknown";
                                // Now that we have it saved in a temporary location,
                                // let's try and determine its real type:
                                String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier();
                                try {
                                    recognizedType = determineFileType(new File(tempFileName), shortName);
                                    logger.fine("File utility recognized unzipped file as " + recognizedType);
                                    if (recognizedType != null && !recognizedType.equals("")) {
                                        datafile.setContentType(recognizedType);
                                    }
                                } catch (Exception ex) {
                                    logger.warning("Failed to run the file utility mime type check on file " + fileName);
                                }
                                datafiles.add(datafile);
                            }
                        }
                    }
                }
                unZippedIn.closeEntry();
            }
        } catch (IOException ioex) {
            // just clear the datafiles list and let
            // ingest default to creating a single DataFile out
            // of the unzipped file.
            logger.warning("Unzipping failed; rolling back to saving the file as is.");
            if (warningMessage == null) {
                warningMessage = "Failed to unzip the file. Saving the file as is.";
            }
            datafiles.clear();
        } catch (FileExceedsMaxSizeException femsx) {
            logger.warning("One of the unzipped files exceeds the size limit; resorting to saving the file as is. " + femsx.getMessage());
            warningMessage = femsx.getMessage() + "; saving the zip file as is, unzipped.";
            datafiles.clear();
        } finally {
            if (unZippedIn != null) {
                try {
                    unZippedIn.close();
                } catch (Exception zEx) {
                }
            }
        }
        if (datafiles.size() > 0) {
            // remove the uploaded zip file:
            try {
                Files.delete(tempFile);
            } catch (IOException ioex) {
                // do nothing - it's just a temp file.
                logger.warning("Could not remove temp file " + tempFile.getFileName().toString());
            }
            // and return:
            return datafiles;
        }
    } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) {
        // Shape files may have to be split into multiple files,
        // one zip archive per each complete set of shape files:
        // File rezipFolder = new File(this.getFilesTempDirectory());
        File rezipFolder = getShapefileUnzipTempDirectory();
        IngestServiceShapefileHelper shpIngestHelper;
        shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder);
        boolean didProcessWork = shpIngestHelper.processFile();
        if (!(didProcessWork)) {
            logger.severe("Processing of zipped shapefile failed.");
            return null;
        }
        try {
            for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) {
                FileInputStream finalFileInputStream = new FileInputStream(finalFile);
                finalType = determineContentType(finalFile);
                if (finalType == null) {
                    logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'");
                    continue;
                }
                File unZippedShapeTempFile = saveInputStreamInTempFile(finalFileInputStream, fileSizeLimit);
                DataFile new_datafile = createSingleDataFile(version, unZippedShapeTempFile, finalFile.getName(), finalType, systemConfig.getFileFixityChecksumAlgorithm());
                if (new_datafile != null) {
                    datafiles.add(new_datafile);
                } else {
                    logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName());
                }
                finalFileInputStream.close();
            }
        } catch (FileExceedsMaxSizeException femsx) {
            logger.severe("One of the unzipped shape files exceeded the size limit; giving up. " + femsx.getMessage());
            datafiles.clear();
        }
        if (datafiles.size() > 0) {
            return datafiles;
        } else {
            logger.severe("No files added from directory of rezipped shapefiles");
        }
        return null;
    }
    // Finally, if none of the special cases above were applicable (or
    // if we were unable to unpack an uploaded file, etc.), we'll just
    // create and return a single DataFile:
    DataFile datafile = createSingleDataFile(version, tempFile.toFile(), fileName, finalType, systemConfig.getFileFixityChecksumAlgorithm());
    if (datafile != null && tempFile.toFile() != null) {
        if (warningMessage != null) {
            createIngestFailureReport(datafile, warningMessage);
            datafile.SetIngestProblem();
        }
        datafiles.add(datafile);
        return datafiles;
    }
    return null;
}
Also used : Path(java.nio.file.Path) IngestServiceShapefileHelper(edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) Charset(java.nio.charset.Charset) FileExceedsMaxSizeException(edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException) IOException(java.io.IOException) FileExceedsMaxSizeException(edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException) XMLStreamException(javax.xml.stream.XMLStreamException) MissingResourceException(java.util.MissingResourceException) FileNotFoundException(java.io.FileNotFoundException) EJBException(javax.ejb.EJBException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) DataFile(edu.harvard.iq.dataverse.DataFile) GZIPInputStream(java.util.zip.GZIPInputStream) ZipInputStream(java.util.zip.ZipInputStream) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File)

Example 2 with FileExceedsMaxSizeException

use of edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException in project dataverse by IQSS.

the class FileUtil method saveInputStreamInTempFile.

// end createDataFiles
private static File saveInputStreamInTempFile(InputStream inputStream, Long fileSizeLimit) throws IOException, FileExceedsMaxSizeException {
    Path tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload");
    if (inputStream != null && tempFile != null) {
        Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
        // size check:
        // (note that "no size limit set" = "unlimited")
        Long fileSize = tempFile.toFile().length();
        if (fileSizeLimit != null && fileSize > fileSizeLimit) {
            try {
                tempFile.toFile().delete();
            } catch (Exception ex) {
            }
            throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), fileSize.toString(), fileSizeLimit.toString()));
        }
        return tempFile.toFile();
    }
    throw new IOException("Failed to save uploaded file.");
}
Also used : Path(java.nio.file.Path) FileExceedsMaxSizeException(edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException) IOException(java.io.IOException) FileExceedsMaxSizeException(edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException) XMLStreamException(javax.xml.stream.XMLStreamException) MissingResourceException(java.util.MissingResourceException) FileNotFoundException(java.io.FileNotFoundException) EJBException(javax.ejb.EJBException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) IOException(java.io.IOException)

Aggregations

FileExceedsMaxSizeException (edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 Path (java.nio.file.Path)2 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)2 MissingResourceException (java.util.MissingResourceException)2 EJBException (javax.ejb.EJBException)2 XMLStreamException (javax.xml.stream.XMLStreamException)2 DataFile (edu.harvard.iq.dataverse.DataFile)1 IngestServiceShapefileHelper (edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper)1 BufferedInputStream (java.io.BufferedInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 Charset (java.nio.charset.Charset)1 ArrayList (java.util.ArrayList)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 ZipEntry (java.util.zip.ZipEntry)1 ZipInputStream (java.util.zip.ZipInputStream)1