use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class SolrIndexServiceBean method reindexFilesInBatches.
private String reindexFilesInBatches(List<DataFile> filesToReindexPermissionsFor) {
List<SolrInputDocument> docs = new ArrayList<>();
Map<Long, List<Long>> byParentId = new HashMap<>();
Map<Long, List<String>> permStringByDatasetVersion = new HashMap<>();
for (DataFile file : filesToReindexPermissionsFor) {
Dataset dataset = (Dataset) file.getOwner();
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) {
boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState());
if (cardShouldExist) {
List<String> cachedPermission = permStringByDatasetVersion.get(datasetVersionFileIsAttachedTo.getId());
if (cachedPermission == null) {
logger.fine("no cached permission! Looking it up...");
List<DvObjectSolrDoc> fileSolrDocs = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs) {
Long datasetVersionId = fileSolrDoc.getDatasetVersionId();
if (datasetVersionId != null) {
permStringByDatasetVersion.put(datasetVersionId, fileSolrDoc.getPermissions());
SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc);
docs.add(solrDoc);
}
}
} else {
logger.fine("cached permission is " + cachedPermission);
List<DvObjectSolrDoc> fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion);
for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions) {
SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc);
docs.add(solrDoc);
}
}
}
}
Long parent = file.getOwner().getId();
List<Long> existingList = byParentId.get(parent);
if (existingList == null) {
List<Long> empty = new ArrayList<>();
byParentId.put(parent, empty);
} else {
List<Long> updatedList = existingList;
updatedList.add(file.getId());
byParentId.put(parent, updatedList);
}
}
try {
persistToSolr(docs);
return " " + filesToReindexPermissionsFor.size() + " files indexed across " + docs.size() + " Solr documents ";
} catch (SolrServerException | IOException ex) {
return " tried to reindex " + filesToReindexPermissionsFor.size() + " files indexed across " + docs.size() + " Solr documents but caught exception: " + ex;
}
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class FileUtil method createSingleDataFile.
private static DataFile createSingleDataFile(DatasetVersion version, File tempFile, String fileName, String contentType, DataFile.ChecksumType checksumType, boolean addToDataset) {
if (tempFile == null) {
return null;
}
DataFile datafile = new DataFile(contentType);
datafile.setModificationTime(new Timestamp(new Date().getTime()));
/**
* @todo Think more about when permissions on files are modified.
* Obviously, here at create time files have some sort of permissions,
* even if these permissions are *implied*, by ViewUnpublishedDataset at
* the dataset level, for example.
*/
datafile.setPermissionModificationTime(new Timestamp(new Date().getTime()));
FileMetadata fmd = new FileMetadata();
// TODO: add directoryLabel?
fmd.setLabel(fileName);
if (addToDataset) {
datafile.setOwner(version.getDataset());
}
fmd.setDataFile(datafile);
datafile.getFileMetadatas().add(fmd);
if (addToDataset) {
if (version.getFileMetadatas() == null) {
version.setFileMetadatas(new ArrayList<>());
}
version.getFileMetadatas().add(fmd);
fmd.setDatasetVersion(version);
version.getDataset().getFiles().add(datafile);
}
generateStorageIdentifier(datafile);
if (!tempFile.renameTo(new File(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier()))) {
return null;
}
try {
// We persist "SHA1" rather than "SHA-1".
datafile.setChecksumType(checksumType);
datafile.setChecksumValue(CalculateCheckSum(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(), datafile.getChecksumType()));
} catch (Exception cksumEx) {
logger.warning("Could not calculate " + checksumType + " signature for the new file " + fileName);
}
return datafile;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class FileUtil method createDataFiles.
public static List<DataFile> createDataFiles(DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, SystemConfig systemConfig) throws IOException {
List<DataFile> datafiles = new ArrayList<>();
String warningMessage = null;
// save the file, in the temporary location for now:
Path tempFile = null;
Long fileSizeLimit = systemConfig.getMaxFileUploadSize();
if (getFilesTempDirectory() != null) {
tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload");
// "temporary" location is the key here; this is why we are not using
// the DataStore framework for this - the assumption is that
// temp files will always be stored on the local filesystem.
// -- L.A. Jul. 2014
logger.fine("Will attempt to save the file as: " + tempFile.toString());
Files.copy(inputStream, tempFile, StandardCopyOption.REPLACE_EXISTING);
// A file size check, before we do anything else:
// (note that "no size limit set" = "unlimited")
// (also note, that if this is a zip file, we'll be checking
// the size limit for each of the individual unpacked files)
Long fileSize = tempFile.toFile().length();
if (fileSizeLimit != null && fileSize > fileSizeLimit) {
try {
tempFile.toFile().delete();
} catch (Exception ex) {
}
throw new IOException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), fileSize.toString(), fileSizeLimit.toString()));
}
} else {
throw new IOException("Temp directory is not configured.");
}
logger.fine("mime type supplied: " + suppliedContentType);
// Let's try our own utilities (Jhove, etc.) to determine the file type
// of the uploaded file. (We may already have a mime type supplied for this
// file - maybe the type that the browser recognized on upload; or, if
// it's a harvest, maybe the remote server has already given us the type
// for this file... with our own type utility we may or may not do better
// than the type supplied:
// -- L.A.
String recognizedType = null;
String finalType = null;
try {
recognizedType = determineFileType(tempFile.toFile(), fileName);
logger.fine("File utility recognized the file as " + recognizedType);
if (recognizedType != null && !recognizedType.equals("")) {
if (suppliedContentType == null || suppliedContentType.equals("") || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_DEFAULT) || suppliedContentType.equalsIgnoreCase(MIME_TYPE_UNDETERMINED_BINARY) || (ingestableAsTabular(suppliedContentType) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_CSV_ALT) && !suppliedContentType.equalsIgnoreCase(MIME_TYPE_XLSX)) || ingestableAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) || recognizedType.equals(MIME_TYPE_ZIP)) {
finalType = recognizedType;
}
}
} catch (Exception ex) {
logger.warning("Failed to run the file utility mime type check on file " + fileName);
}
if (finalType == null) {
finalType = (suppliedContentType == null || suppliedContentType.equals("")) ? MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType;
}
if (finalType.equals("application/fits-gzipped")) {
InputStream uncompressedIn = null;
String finalFileName = fileName;
// since we are going to uncompress it:
if (fileName != null && fileName.matches(".*\\.gz$")) {
finalFileName = fileName.replaceAll("\\.gz$", "");
}
DataFile datafile = null;
try {
uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile()));
File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit);
datafile = createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm());
} catch (IOException | FileExceedsMaxSizeException ioex) {
datafile = null;
} finally {
if (uncompressedIn != null) {
try {
uncompressedIn.close();
} catch (IOException e) {
}
}
}
// down, from the original, uncompressed file.
if (datafile != null) {
// remove the compressed temp file:
try {
tempFile.toFile().delete();
} catch (SecurityException ex) {
// (this is very non-fatal)
logger.warning("Failed to delete temporary file " + tempFile.toString());
}
datafiles.add(datafile);
return datafiles;
}
// If it's a ZIP file, we are going to unpack it and create multiple
// DataFile objects from its contents:
} else if (finalType.equals("application/zip")) {
ZipInputStream unZippedIn = null;
ZipEntry zipEntry = null;
int fileNumberLimit = systemConfig.getZipUploadFilesLimit();
try {
Charset charset = null;
if (charset != null) {
unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset);
} else {
unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()));
}
while (true) {
try {
zipEntry = unZippedIn.getNextEntry();
} catch (IllegalArgumentException iaex) {
// Note:
// ZipInputStream documentation doesn't even mention that
// getNextEntry() throws an IllegalArgumentException!
// but that's what happens if the file name of the next
// entry is not valid in the current CharSet.
// -- L.A.
warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is.";
logger.warning(warningMessage);
throw new IOException();
}
if (zipEntry == null) {
break;
}
if (!zipEntry.isDirectory()) {
if (datafiles.size() > fileNumberLimit) {
logger.warning("Zip upload - too many files.");
warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + "); please upload a zip archive with fewer files, if you want them to be ingested " + "as individual DataFiles.";
throw new IOException();
}
String fileEntryName = zipEntry.getName();
logger.fine("ZipEntry, file: " + fileEntryName);
if (fileEntryName != null && !fileEntryName.equals("")) {
String shortName = fileEntryName.replaceFirst("^.*[\\/]", "");
// start with "._")
if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) {
// OK, this seems like an OK file entry - we'll try
// to read it and create a DataFile with it:
File unZippedTempFile = saveInputStreamInTempFile(unZippedIn, fileSizeLimit);
DataFile datafile = createSingleDataFile(version, unZippedTempFile, shortName, MIME_TYPE_UNDETERMINED_DEFAULT, systemConfig.getFileFixityChecksumAlgorithm(), false);
if (!fileEntryName.equals(shortName)) {
// If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes),
// we'll extract the directory name, then a) strip the leading and trailing slashes;
// and b) replace all the back slashes with regular ones and b) replace any multiple
// slashes with a single slash:
String directoryName = fileEntryName.replaceFirst("[\\/][\\/]*[^\\/]*$", "").replaceFirst("^[\\/]*", "").replaceAll("[\\/][\\/]*", "/");
if (!"".equals(directoryName)) {
logger.fine("setting the directory label to " + directoryName);
datafile.getFileMetadata().setDirectoryLabel(directoryName);
}
}
if (datafile != null) {
// We have created this datafile with the mime type "unknown";
// Now that we have it saved in a temporary location,
// let's try and determine its real type:
String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier();
try {
recognizedType = determineFileType(new File(tempFileName), shortName);
logger.fine("File utility recognized unzipped file as " + recognizedType);
if (recognizedType != null && !recognizedType.equals("")) {
datafile.setContentType(recognizedType);
}
} catch (Exception ex) {
logger.warning("Failed to run the file utility mime type check on file " + fileName);
}
datafiles.add(datafile);
}
}
}
}
unZippedIn.closeEntry();
}
} catch (IOException ioex) {
// just clear the datafiles list and let
// ingest default to creating a single DataFile out
// of the unzipped file.
logger.warning("Unzipping failed; rolling back to saving the file as is.");
if (warningMessage == null) {
warningMessage = "Failed to unzip the file. Saving the file as is.";
}
datafiles.clear();
} catch (FileExceedsMaxSizeException femsx) {
logger.warning("One of the unzipped files exceeds the size limit; resorting to saving the file as is. " + femsx.getMessage());
warningMessage = femsx.getMessage() + "; saving the zip file as is, unzipped.";
datafiles.clear();
} finally {
if (unZippedIn != null) {
try {
unZippedIn.close();
} catch (Exception zEx) {
}
}
}
if (datafiles.size() > 0) {
// remove the uploaded zip file:
try {
Files.delete(tempFile);
} catch (IOException ioex) {
// do nothing - it's just a temp file.
logger.warning("Could not remove temp file " + tempFile.getFileName().toString());
}
// and return:
return datafiles;
}
} else if (finalType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)) {
// Shape files may have to be split into multiple files,
// one zip archive per each complete set of shape files:
// File rezipFolder = new File(this.getFilesTempDirectory());
File rezipFolder = getShapefileUnzipTempDirectory();
IngestServiceShapefileHelper shpIngestHelper;
shpIngestHelper = new IngestServiceShapefileHelper(tempFile.toFile(), rezipFolder);
boolean didProcessWork = shpIngestHelper.processFile();
if (!(didProcessWork)) {
logger.severe("Processing of zipped shapefile failed.");
return null;
}
try {
for (File finalFile : shpIngestHelper.getFinalRezippedFiles()) {
FileInputStream finalFileInputStream = new FileInputStream(finalFile);
finalType = determineContentType(finalFile);
if (finalType == null) {
logger.warning("Content type is null; but should default to 'MIME_TYPE_UNDETERMINED_DEFAULT'");
continue;
}
File unZippedShapeTempFile = saveInputStreamInTempFile(finalFileInputStream, fileSizeLimit);
DataFile new_datafile = createSingleDataFile(version, unZippedShapeTempFile, finalFile.getName(), finalType, systemConfig.getFileFixityChecksumAlgorithm());
if (new_datafile != null) {
datafiles.add(new_datafile);
} else {
logger.severe("Could not add part of rezipped shapefile. new_datafile was null: " + finalFile.getName());
}
finalFileInputStream.close();
}
} catch (FileExceedsMaxSizeException femsx) {
logger.severe("One of the unzipped shape files exceeded the size limit; giving up. " + femsx.getMessage());
datafiles.clear();
}
if (datafiles.size() > 0) {
return datafiles;
} else {
logger.severe("No files added from directory of rezipped shapefiles");
}
return null;
}
// Finally, if none of the special cases above were applicable (or
// if we were unable to unpack an uploaded file, etc.), we'll just
// create and return a single DataFile:
DataFile datafile = createSingleDataFile(version, tempFile.toFile(), fileName, finalType, systemConfig.getFileFixityChecksumAlgorithm());
if (datafile != null && tempFile.toFile() != null) {
if (warningMessage != null) {
createIngestFailureReport(datafile, warningMessage);
datafile.SetIngestProblem();
}
datafiles.add(datafile);
return datafiles;
}
return null;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class DataFileZipper method addFileToZipStream.
public long addFileToZipStream(DataFile dataFile) throws IOException {
if (zipOutputStream == null) {
openZipStream();
}
boolean createManifest = fileManifest != null;
DataAccessRequest daReq = new DataAccessRequest();
StorageIO<DataFile> accessObject = DataAccess.getStorageIO(dataFile, daReq);
if (accessObject != null) {
accessObject.open();
long byteSize = 0;
String fileName = accessObject.getFileName();
String mimeType = accessObject.getMimeType();
if (mimeType == null || mimeType.equals("")) {
mimeType = "application/octet-stream";
}
// if (sizeTotal + fileSize < sizeLimit) {
Boolean Success = true;
InputStream instream = accessObject.getInputStream();
if (instream == null) {
if (createManifest) {
addToManifest(fileName + " (" + mimeType + ") COULD NOT be downloaded because an I/O error has occured. \r\n");
}
Success = false;
} else {
String zipEntryName = checkZipEntryName(fileName);
ZipEntry e = new ZipEntry(zipEntryName);
logger.fine("created new zip entry for " + zipEntryName);
// support for categories: (not yet implemented)
// String zipEntryDirectoryName = file.getCategory(versionNum);
// ZipEntry e = new ZipEntry(zipEntryDirectoryName + "/" + zipEntryName);
zipOutputStream.putNextEntry(e);
// before writing out any bytes from the input stream, flush
// any extra content, such as the variable header for the
// subsettable files:
String varHeaderLine = accessObject.getVarHeader();
if (varHeaderLine != null) {
zipOutputStream.write(varHeaderLine.getBytes());
byteSize += (varHeaderLine.getBytes().length);
}
byte[] data = new byte[8192];
int i = 0;
while ((i = instream.read(data)) > 0) {
zipOutputStream.write(data, 0, i);
logger.fine("wrote " + i + " bytes;");
byteSize += i;
zipOutputStream.flush();
}
instream.close();
zipOutputStream.closeEntry();
logger.fine("closed zip entry for " + zipEntryName);
if (createManifest) {
addToManifest(zipEntryName + " (" + mimeType + ") " + byteSize + " bytes.\r\n");
}
if (byteSize > 0) {
zippedFilesList.add(dataFile.getId());
}
}
// }
return byteSize;
}
return 0L;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class ImageThumbConverter method generatePDFThumbnail.
private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int size) {
if (isPdfFileOverSizeLimit(storageIO.getDataFile().getFilesize())) {
logger.fine("Image file too large (" + storageIO.getDataFile().getFilesize() + " bytes) - skipping");
return false;
}
// better give up right away:
if (!isImageMagickInstalled()) {
return false;
}
File sourcePdfFile = null;
// We'll to get a local Path for this file - but if that is not available
// (i.e., if it's a file that's stored by a driver that does not provide
// direct file access - e.g., swift), we'll save this PDF in a temp file,
// will run the ImageMagick on it, and will save its output in another temp
// file, and will save it as an "auxiliary" file via the driver.
boolean tempFilesRequired = false;
try {
Path pdfFilePath = storageIO.getFileSystemPath();
sourcePdfFile = pdfFilePath.toFile();
logger.fine("Opened the source pdf file as a local File.");
} catch (UnsupportedDataAccessOperationException uoex) {
// this means there is no direct filesystem path for this object;
logger.fine("Could not open source pdf file as a local file - will go the temp file route.");
tempFilesRequired = true;
} catch (IOException ioex) {
// this on the other hand is likely a fatal condition :(
return false;
}
if (tempFilesRequired) {
ReadableByteChannel pdfFileChannel;
try {
storageIO.open();
// inputStream = storageIO.getInputStream();
pdfFileChannel = storageIO.getReadChannel();
} catch (Exception ioex) {
logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier());
return false;
}
File tempFile;
FileChannel tempFileChannel;
try {
tempFile = File.createTempFile("tempFileToRescale", ".tmp");
tempFileChannel = new FileOutputStream(tempFile).getChannel();
tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize());
} catch (IOException ioex) {
logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file.");
return false;
}
sourcePdfFile = tempFile;
}
String imageThumbFileName = generatePDFThumbnailFromFile(sourcePdfFile.getAbsolutePath(), size);
if (imageThumbFileName == null) {
return false;
}
// generated thumbnail with via the storage driver:
if (tempFilesRequired) {
try {
logger.fine("attempting to save generated pdf thumbnail, as AUX file " + THUMBNAIL_SUFFIX + size);
storageIO.savePathAsAux(Paths.get(imageThumbFileName), THUMBNAIL_SUFFIX + size);
} catch (IOException ioex) {
logger.warning("failed to save generated pdf thumbnail, as AUX file " + THUMBNAIL_SUFFIX + size + "!");
return false;
}
}
return true;
}
Aggregations