use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.
the class DatasetUtil method attemptToAutomaticallySelectThumbnailFromDataFiles.
/**
* Pass an optional datasetVersion in case the file system is checked
*
* @param dataset
* @param datasetVersion
* @return
*/
public static DataFile attemptToAutomaticallySelectThumbnailFromDataFiles(Dataset dataset, DatasetVersion datasetVersion) {
if (dataset == null) {
return null;
}
if (dataset.isUseGenericThumbnail()) {
logger.fine("Bypassing logic to find a thumbnail because a generic icon for the dataset is desired.");
return null;
}
if (datasetVersion == null) {
logger.fine("getting latest version of dataset");
datasetVersion = dataset.getLatestVersion();
}
for (FileMetadata fmd : datasetVersion.getFileMetadatas()) {
DataFile testFile = fmd.getDataFile();
if (FileUtil.isThumbnailSupported(testFile) && ImageThumbConverter.isThumbnailAvailable(testFile, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE)) {
return testFile;
}
}
logger.fine("In attemptToAutomaticallySelectThumbnailFromDataFiles and interated through all the files but couldn't find a thumbnail.");
return null;
}
use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.
the class IngestServiceBean method addFiles.
// This method tries to permanently store the files on the filesystem.
// It should be called before we attempt to permanently save the files in
// the database by calling the Save command on the dataset and/or version.
// TODO: rename the method finalizeFiles()? or something like that?
public void addFiles(DatasetVersion version, List<DataFile> newFiles) {
if (newFiles != null && newFiles.size() > 0) {
// final check for duplicate file names;
// we tried to make the file names unique on upload, but then
// the user may have edited them on the "add files" page, and
// renamed FOOBAR-1.txt back to FOOBAR.txt...
IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles);
Dataset dataset = version.getDataset();
for (DataFile dataFile : newFiles) {
String tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + dataFile.getStorageIdentifier();
// These are all brand new files, so they should all have
// one filemetadata total. -- L.A.
FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
String fileName = fileMetadata.getLabel();
// hasn't been done yet:
if (dataFile.getOwner() == null) {
dataFile.setOwner(dataset);
version.getFileMetadatas().add(dataFile.getFileMetadata());
dataFile.getFileMetadata().setDatasetVersion(version);
dataset.getFiles().add(dataFile);
}
boolean metadataExtracted = false;
if (FileUtil.ingestableAsTabular(dataFile)) {
/*
* Note that we don't try to ingest the file right away -
* instead we mark it as "scheduled for ingest", then at
* the end of the save process it will be queued for async.
* ingest in the background. In the meantime, the file
* will be ingested as a regular, non-tabular file, and
* appear as such to the user, until the ingest job is
* finished with the Ingest Service.
*/
dataFile.SetIngestScheduled();
} else if (fileMetadataExtractable(dataFile)) {
try {
// FITS is the only type supported for metadata
// extraction, as of now. -- L.A. 4.0
dataFile.setContentType("application/fits");
metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
} catch (IOException mex) {
logger.severe("Caught exception trying to extract indexable metadata from file " + fileName + ", " + mex.getMessage());
}
if (metadataExtracted) {
logger.fine("Successfully extracted indexable metadata from file " + fileName);
} else {
logger.fine("Failed to extract indexable metadata from file " + fileName);
}
}
// Try to save the file in its permanent location:
String storageId = dataFile.getStorageIdentifier().replaceFirst("^tmp://", "");
Path tempLocationPath = Paths.get(FileUtil.getFilesTempDirectory() + "/" + storageId);
WritableByteChannel writeChannel = null;
FileChannel readChannel = null;
boolean localFile = false;
boolean savedSuccess = false;
StorageIO<DataFile> dataAccess = null;
try {
logger.fine("Attempting to create a new storageIO object for " + storageId);
dataAccess = DataAccess.createNewStorageIO(dataFile, storageId);
if (dataAccess.isLocalFile()) {
localFile = true;
}
logger.fine("Successfully created a new storageIO object.");
/*
This commented-out code demonstrates how to copy bytes
from a local InputStream (or a readChannel) into the
writable byte channel of a Dataverse DataAccessIO object:
*/
/*
storageIO.open(DataAccessOption.WRITE_ACCESS);
writeChannel = storageIO.getWriteChannel();
readChannel = new FileInputStream(tempLocationPath.toFile()).getChannel();
long bytesPerIteration = 16 * 1024; // 16K bytes
long start = 0;
while ( start < readChannel.size() ) {
readChannel.transferTo(start, bytesPerIteration, writeChannel);
start += bytesPerIteration;
}
*/
/*
But it's easier to use this convenience method from the
DataAccessIO:
(if the underlying storage method for this file is
local filesystem, the DataAccessIO will simply copy
the file using Files.copy, like this:
Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), StandardCopyOption.REPLACE_EXISTING);
*/
dataAccess.savePath(tempLocationPath);
// Set filesize in bytes
//
dataFile.setFilesize(dataAccess.getSize());
savedSuccess = true;
logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
} catch (IOException ioex) {
logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
} finally {
if (readChannel != null) {
try {
readChannel.close();
} catch (IOException e) {
}
}
if (writeChannel != null) {
try {
writeChannel.close();
} catch (IOException e) {
}
}
}
// Since we may have already spent some CPU cycles scaling down image thumbnails,
// we may as well save them, by moving these generated images to the permanent
// dataset directory. We should also remember to delete any such files in the
// temp directory:
List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), storageId);
if (generatedTempFiles != null) {
for (Path generated : generatedTempFiles) {
if (savedSuccess) {
// && localFile) {
logger.fine("(Will also try to permanently save generated thumbnail file " + generated.toString() + ")");
try {
// Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), generated.getFileName().toString()));
int i = generated.toString().lastIndexOf("thumb");
if (i > 1) {
String extensionTag = generated.toString().substring(i);
dataAccess.savePathAsAux(generated, extensionTag);
logger.fine("Saved generated thumbnail as aux object. \"preview available\" status: " + dataFile.isPreviewImageAvailable());
} else {
logger.warning("Generated thumbnail file name does not match the expected pattern: " + generated.toString());
}
} catch (IOException ioex) {
logger.warning("Failed to save generated file " + generated.toString());
}
try {
Files.delete(generated);
} catch (IOException ioex) {
logger.warning("Failed to delete generated file " + generated.toString());
}
}
}
}
try {
logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
Files.delete(tempLocationPath);
} catch (IOException ex) {
// (non-fatal - it's just a temp file.)
logger.warning("Failed to delete temp file " + tempLocationPath.toString());
}
// Any necessary post-processing:
// performPostProcessingTasks(dataFile);
}
logger.fine("Done! Finished saving new files in permanent storage.");
}
}
use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.
the class IngestServiceBean method extractMetadata.
/*
* extractMetadata:
* framework for extracting metadata from uploaded files. The results will
* be used to populate the metadata of the Dataset to which the file belongs.
*/
public boolean extractMetadata(String tempFileLocation, DataFile dataFile, DatasetVersion editVersion) throws IOException {
boolean ingestSuccessful = false;
FileInputStream tempFileInputStream = null;
try {
tempFileInputStream = new FileInputStream(new File(tempFileLocation));
} catch (FileNotFoundException notfoundEx) {
throw new IOException("Could not open temp file " + tempFileLocation);
}
// Locate metadata extraction plugin for the file format by looking
// it up with the Ingest Service Provider Registry:
// FileMetadataExtractor extractorPlugin = IngestSP.getMetadataExtractorByMIMEType(dfile.getContentType());
FileMetadataExtractor extractorPlugin = new FITSFileMetadataExtractor();
FileMetadataIngest extractedMetadata = extractorPlugin.ingest(new BufferedInputStream(tempFileInputStream));
Map<String, Set<String>> extractedMetadataMap = extractedMetadata.getMetadataMap();
// Store the fields and values we've gathered for safe-keeping:
// from 3.6:
// attempt to ingest the extracted metadata into the database;
// TODO: this should throw an exception if anything goes wrong.
FileMetadata fileMetadata = dataFile.getFileMetadata();
if (extractedMetadataMap != null) {
logger.fine("Ingest Service: Processing extracted metadata;");
if (extractedMetadata.getMetadataBlockName() != null) {
logger.fine("Ingest Service: This metadata belongs to the " + extractedMetadata.getMetadataBlockName() + " metadata block.");
processDatasetMetadata(extractedMetadata, editVersion);
}
processFileLevelMetadata(extractedMetadata, fileMetadata);
}
ingestSuccessful = true;
return ingestSuccessful;
}
use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.
the class IngestUtil method existingPathNamesAsSet.
private static Set<String> existingPathNamesAsSet(DatasetVersion version, FileMetadata fileMetadata) {
Set<String> pathNamesExisting = new HashSet<>();
// be able to call this method
for (Iterator<FileMetadata> fmIt = version.getFileMetadatas().iterator(); fmIt.hasNext(); ) {
FileMetadata fm = fmIt.next();
if (fm.getId() != null && (fileMetadata == null || !fm.getId().equals(fileMetadata.getId()))) {
String existingName = fm.getLabel();
String existingDir = fm.getDirectoryLabel();
String existingPath = makePathName(existingDir, existingName);
if (!existingPath.isEmpty()) {
pathNamesExisting.add(existingPath);
// match. e.g. stata file foobar.dta becomes foobar.tab once ingested!
if (fm.getDataFile().isTabularData()) {
String originalPath;
String originalMimeType = fm.getDataFile().getDataTable().getOriginalFileFormat();
if (originalMimeType != null) {
String origFileExtension = FileUtil.generateOriginalExtension(originalMimeType);
originalPath = existingPath.replaceAll(".tab$", origFileExtension);
} else {
originalPath = existingPath.replaceAll(".tab$", "");
}
pathNamesExisting.add(originalPath);
}
}
}
}
return pathNamesExisting;
}
use of edu.harvard.iq.dataverse.FileMetadata in project dataverse by IQSS.
the class IngestUtil method getUnfValuesOfFiles.
public static List<String> getUnfValuesOfFiles(DatasetVersion version) {
List<String> unfValueList = new ArrayList<>();
if (version == null) {
return unfValueList;
}
Iterator<FileMetadata> itfm = version.getFileMetadatas().iterator();
while (itfm.hasNext()) {
FileMetadata fileMetadata = itfm.next();
if (fileMetadata != null && fileMetadata.getDataFile() != null && fileMetadata.getDataFile().isTabularData() && fileMetadata.getDataFile().getUnf() != null) {
String varunf = fileMetadata.getDataFile().getUnf();
unfValueList.add(varunf);
}
}
return unfValueList;
}
Aggregations