Search in sources :

Example 1 with IngestServiceShapefileHelper

use of edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper in project dataverse by IQSS.

the class FileUtil method createDataFiles.

public static List<DataFile> createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, SystemConfig systemConfig) throws IOException {
    List<DataFile> datafiles = new ArrayList<>();
    String warningMessage = null;
    // save the file, in the temporary location for now:
    Path tempFile = null;
    Long fileSizeLimit = systemConfig.getMaxFileUploadSize();
    if (getFilesTempDirectory() != null) {
        tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload");
        // "temporary" location is the key here; this is why we are not using
        // the DataStore framework for this - the assumption is that
        // temp files will always be stored on the local filesystem.
        // -- L.A. Jul. 2014
        logger.fine("Will attempt to save the file as: " + tempFile.toString());
        Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
        // A file size check, before we do anything else:
        // (note that "no size limit set" = "unlimited")
        // (also note, that if this is a zip file, we'll be checking
        // the size limit for each of the individual unpacked files)
        Long fileSize = tempFile.toFile().length();
        if (fileSizeLimit != null && fileSize > fileSizeLimit) {
            try {
                tempFile.toFile().delete();
            } catch (Exception ex) {
            }
            throw new IOException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), fileSize.toString(), fileSizeLimit.toString()));
        }
    } else {
        throw new IOException("Temp directory is not configured.");
    }
    logger.fine("mime type supplied: " + suppliedContentType);
    // Let's try our own utilities (Jhove, etc.) to determine the file type
    // of the uploaded file. (We may already have a mime type supplied for this
    // file - maybe the type that the browser recognized on upload; or, if
    // it's a harvest, maybe the remote server has already given us the type
    // for this file... with our own type utility we may or may not do better
    // than the type supplied:
    // -- L.A.
    String recognizedType = null;
    String finalType = null;
    try {
        recognizedType = determineFileType(tempFile.toFile(), fileName);
        logger.fine("File utility recognized the file as " + recognizedType);
        if (recognizedType != null && !recognizedType.equals("")) {
            if (suppliedContentType == null || suppliedContentType.equals("") || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_DEFAULT) || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_BINARY) || (ingestableAsTabular(suppliedContentType) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV_ALT) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX)) || ingestableAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) || recognizedType.equals(MIME_TYPE_ZIP)) {
                finalType = recognizedType;
            }
        }
    } catch (Exception ex) {
        logger.warning("Failed to run the file utility mime type check on file " + fileName);
    }
    if (finalType == null) {
        finalType = (suppliedContentType == null || suppliedContentType.equals("")) ? MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType;
    }
    if (finalType.equals("application/fits-gzipped")) {
        InputStream uncompressedIn = null;
        String finalFileName = fileName;
        // since we are going to uncompress it:
        if (fileName != null && fileName.matches(".*\\.gz$")) {
            finalFileName = fileName.replaceAll("\\.gz$", "");
        }
        DataFile datafile = null;
        try {
            uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile()));
            File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit);
            datafile = createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm());
        } catch (IOException | FileExceedsMaxSizeException ioex) {
            datafile = null;
        } finally {
            if (uncompressedIn != null) {
                try {
                    uncompressedIn.close();
                } catch (IOException e) {
                }
            }
        }
        // down, from the original, uncompressed file.
        if (datafile != null) {
            // remove the compressed temp file:
            try {
                tempFile.toFile().delete();
            } catch (SecurityException ex) {
                // (this is very non-fatal)
                logger.warning("Failed to delete temporary file " + tempFile.toString());
            }
            datafiles.add(datafile);
            return datafiles;
        }
    // If it's a ZIP file, we are going to unpack it and create multiple
    // DataFile objects from its contents:
    } else if (finalType.equals("application/zip")) {
        ZipInputStream unZippedIn = null;
        ZipEntry zipEntry = null;
        int fileNumberLimit = systemConfig.getZipUploadFilesLimit();
        try {
            Charset charset = null;
            if (charset != null) {
                unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset);
            } else {
                unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()));
            }
            while (true) {
                try {
                    zipEntry = unZippedIn.getNextEntry();
                } catch (IllegalArgumentException iaex) {
                    // Note:
                    // ZipInputStream documentation doesn't even mention that
                    // getNextEntry() throws an IllegalArgumentException!
                    // but that's what happens if the file name of the next
                    // entry is not valid in the current CharSet.
                    // -- L.A.
                    warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is.";
                    logger.warning(warningMessage);
                    throw new IOException();
                }
                if (zipEntry == null) {
                    break;
                }
                if (!zipEntry.isDirectory()) {
                    if (datafiles.size() > fileNumberLimit) {
                        logger.warning("Zip upload - too many files.");
                        warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + "); please upload a zip archive with fewer files, if you want them to be ingested " + "as individual DataFiles.";
                        throw new IOException();
                    }
                    String fileEntryName = zipEntry.getName();
                    logger.fine("ZipEntry, file: " + fileEntryName);
                    if (fileEntryName != null && !fileEntryName.equals("")) {
                        String shortName = fileEntryName.replaceFirst("^.*[\\/]", "");
                        // start with "._")
                        if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) {
                            // OK, this seems like an OK file entry - we'll try
                            // to read it and create a DataFile with it:
                            File unZippedTempFile = saveInputStreamInTempFile(unZippedIn, fileSizeLimit);
                            DataFile datafile = createSingleDataFile(version, unZippedTempFile, shortName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm(), false);
                            if (!fileEntryName.equals(shortName)) {
                                // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes),
                                // we'll extract the directory name, then a) strip the leading and trailing slashes;
                                // and b) replace all the back slashes with regular ones and b) replace any multiple
                                // slashes with a single slash:
                                String directoryName = fileEntryName.replaceFirst("[\\/][\\/]*[^\\/]*$", "").replaceFirst("^[\\/]*", "").replaceAll("[\\/][\\/]*", "/");
                                if (!"".equals(directoryName)) {
                                    logger.fine("setting the directory label to " + directoryName);
                                    datafile.getFileMetadata().setDirectoryLabel(directoryName);
                                }
                            }
                            if (datafile != null) {
                                // We have created this datafile with the mime type "unknown";
                                // Now that we have it saved in a temporary location,
                                // let's try and determine its real type:
                                String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier();
                                try {
                                    recognizedType = determineFileType(new File(tempFileName), shortName);
                                    logger.fine("File utility recognized unzipped file as " + recognizedType);
                                    if (recognizedType != null && !recognizedType.equals("")) {
                                        datafile.setContentType(recognizedType);
                                    }
                                } catch (Exception ex) {
                                    logger.warning("Failed to run the file utility mime type check on file " + fileName);
                                }
                                datafiles.add(datafile);
                            }
                        }
                    }
                }
                unZippedIn.closeEntry();
            }
        } catch (IOException ioex) {
            // just clear the datafiles list and let
            // ingest default to creating a single DataFile out
            // of the unzipped file.
            logger.warning("Unzipping failed; rolling back to saving the file as is.");
            if (warningMessage == null) {
                warningMessage = "Failed to unzip the file. Saving the file as is.";
            }
            datafiles.clear();
        } catch (FileExceedsMaxSizeException femsx) {
            logger.warning("One of the unzipped files exceeds the size limit; resorting to saving the file as is. " + femsx.getMessage());
            warningMessage = femsx.getMessage() + "; saving the zip file as is, unzipped.";
            datafiles.clear();
        } finally {
            if (unZippedIn != null) {
                try {
                    unZippedIn.close();
                } catch (Exception zEx) {
                }
            }
        }
        if (datafiles.size() > 0) {
            // remove the uploaded zip file:
            try {
                Files.delete(tempFile);
            } catch (IOException ioex) {
                // do nothing - it's just a temp file.
                logger.warning("Could not remove temp file " + tempFile.getFileName().toString());
            }
            // and return:
            return datafiles;
        }
    } else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) {
        // Shape files may have to be split into multiple files,
        // one zip archive per each complete set of shape files:
        // File rezipFolder = new File(this.getFilesTempDirectory());
        File rezipFolder = getShapefileUnzipTempDirectory();
        IngestServiceShapefileHelper shpIngestHelper;
        shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder);
        boolean didProcessWork = shpIngestHelper.processFile();
        if (!(didProcessWork)) {
            logger.severe("Processing of zipped shapefile failed.");
            return null;
        }
        try {
            for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) {
                FileInputStream finalFileInputStream = new FileInputStream(finalFile);
                finalType = determineContentType(finalFile);
                if (finalType == null) {
                    logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'");
                    continue;
                }
                File unZippedShapeTempFile = saveInputStreamInTempFile(finalFileInputStream, fileSizeLimit);
                DataFile new_datafile = createSingleDataFile(version, unZippedShapeTempFile, finalFile.getName(), finalType, systemConfig.getFileFixityChecksumAlgorithm());
                if (new_datafile != null) {
                    datafiles.add(new_datafile);
                } else {
                    logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName());
                }
                finalFileInputStream.close();
            }
        } catch (FileExceedsMaxSizeException femsx) {
            logger.severe("One of the unzipped shape files exceeded the size limit; giving up. " + femsx.getMessage());
            datafiles.clear();
        }
        if (datafiles.size() > 0) {
            return datafiles;
        } else {
            logger.severe("No files added from directory of rezipped shapefiles");
        }
        return null;
    }
    // Finally, if none of the special cases above were applicable (or
    // if we were unable to unpack an uploaded file, etc.), we'll just
    // create and return a single DataFile:
    DataFile datafile = createSingleDataFile(version, tempFile.toFile(), fileName, finalType, systemConfig.getFileFixityChecksumAlgorithm());
    if (datafile != null && tempFile.toFile() != null) {
        if (warningMessage != null) {
            createIngestFailureReport(datafile, warningMessage);
            datafile.SetIngestProblem();
        }
        datafiles.add(datafile);
        return datafiles;
    }
    return null;
}
Also used : Path(java.nio.file.Path) IngestServiceShapefileHelper(edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) Charset(java.nio.charset.Charset) FileExceedsMaxSizeException(edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException) IOException(java.io.IOException) FileExceedsMaxSizeException(edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException) XMLStreamException(javax.xml.stream.XMLStreamException) MissingResourceException(java.util.MissingResourceException) FileNotFoundException(java.io.FileNotFoundException) EJBException(javax.ejb.EJBException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) DataFile(edu.harvard.iq.dataverse.DataFile) GZIPInputStream(java.util.zip.GZIPInputStream) ZipInputStream(java.util.zip.ZipInputStream) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File)

Aggregations

DataFile (edu.harvard.iq.dataverse.DataFile)1 FileExceedsMaxSizeException (edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException)1 IngestServiceShapefileHelper (edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper)1 BufferedInputStream (java.io.BufferedInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 Charset (java.nio.charset.Charset)1 Path (java.nio.file.Path)1 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)1 ArrayList (java.util.ArrayList)1 MissingResourceException (java.util.MissingResourceException)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 ZipEntry (java.util.zip.ZipEntry)1 ZipInputStream (java.util.zip.ZipInputStream)1 EJBException (javax.ejb.EJBException)1 XMLStreamException (javax.xml.stream.XMLStreamException)1