use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class DatasetUtilTest method testGetThumbnailCandidates.
/**
* Test of getThumbnailCandidates method, of class DatasetUtil.
*/
@Test
public void testGetThumbnailCandidates() {
assertEquals(new ArrayList<>(), DatasetUtil.getThumbnailCandidates(null, false));
Dataset dataset = MocksFactory.makeDataset();
DataFile dataFile = MocksFactory.makeDataFile();
dataFile.setContentType("image/");
dataFile.setOwner(dataset);
dataFile.setStorageIdentifier("file://src/test/resources/images/coffeeshop.png");
System.out.println(ImageThumbConverter.isThumbnailAvailable(dataFile));
DatasetVersion version = dataset.getCreateVersion();
List<FileMetadata> fmds = new ArrayList<>();
fmds.add(MocksFactory.addFileMetadata(dataFile));
version.setFileMetadatas(fmds);
assertEquals(new ArrayList<>(), DatasetUtil.getThumbnailCandidates(dataset, false));
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class DataConverter method runFormatConversion.
// end of performformatconversion();
// Method for (subsettable) file format conversion.
// The method needs the subsettable file saved on disk as in the
// TAB-delimited format.
// Meaning, if this is a remote subsettable file, it needs to be downloaded
// and stored locally as a temporary file; and if it's a fixed-field file, it
// needs to be converted to TAB-delimited, before you can feed the file
// to this method. (See performFormatConversion() method)
// The method below takes the tab file and sends it to the R server
// (possibly running on a remote host) and gets back the transformed copy,
// providing error-checking and diagnostics in the process.
// This is mostly Akio Sone's code from DVN3.
// (hence some obsolete elements in the comment above: ALL of the tabular
// data files in Dataverse are saved in tab-delimited format - we no longer
// support fixed-field files!
private static File runFormatConversion(DataFile file, File tabFile, String formatRequested) {
if (formatRequested.equals(FILE_TYPE_TAB)) {
return tabFile;
}
File formatConvertedFile = null;
// create the service instance
RemoteDataFrameService dfs = new RemoteDataFrameService();
if ("RData".equals(formatRequested)) {
List<DataVariable> dataVariables = file.getDataTable().getDataVariables();
Map<String, Map<String, String>> vls = null;
vls = getValueTableForRequestedVariables(dataVariables);
logger.fine("format conversion: variables(getDataVariableForRequest())=" + dataVariables + "\n");
logger.fine("format conversion: variables(dataVariables)=" + dataVariables + "\n");
logger.fine("format conversion: value table(vls)=" + vls + "\n");
RJobRequest sro = new RJobRequest(dataVariables, vls);
sro.setTabularDataFileName(tabFile.getAbsolutePath());
sro.setRequestType(SERVICE_REQUEST_CONVERT);
sro.setFormatRequested(FILE_TYPE_RDATA);
// execute the service
Map<String, String> resultInfo = dfs.execute(sro);
// resultInfo.put("offlineCitation", citation);
logger.fine("resultInfo=" + resultInfo + "\n");
if ("true".equals(resultInfo.get("RexecError"))) {
logger.fine("R-runtime error trying to convert a file.");
return null;
} else {
String dataFrameFileName = resultInfo.get("dataFrameFileName");
logger.fine("data frame file name: " + dataFrameFileName);
formatConvertedFile = new File(dataFrameFileName);
}
} else if ("prep".equals(formatRequested)) {
formatConvertedFile = dfs.runDataPreprocessing(file);
} else {
logger.warning("Unsupported file format requested: " + formatRequested);
return null;
}
if (formatConvertedFile != null && formatConvertedFile.exists()) {
logger.fine("frmtCnvrtdFile:length=" + formatConvertedFile.length());
} else {
logger.warning("Format-converted file was not properly created.");
return null;
}
return formatConvertedFile;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class DataConverter method performFormatConversion.
public static StorageIO<DataFile> performFormatConversion(DataFile file, StorageIO<DataFile> storageIO, String formatRequested, String formatType) {
if (!file.isTabularData()) {
return null;
}
// we don't need to do anything:
if (formatRequested.equals(FILE_TYPE_TAB) && file.getContentType().equals("text/tab-separated-values")) {
return storageIO;
}
InputStream convertedFileStream = null;
long convertedFileSize = 0;
// format:
try {
convertedFileStream = Channels.newInputStream((ReadableByteChannel) storageIO.openAuxChannel(formatRequested));
convertedFileSize = storageIO.getAuxObjectSize(formatRequested);
} catch (IOException ioex) {
logger.fine("No cached copy for file format " + formatRequested + ", file " + file.getStorageIdentifier());
convertedFileStream = null;
}
// If not cached, run the conversion:
if (convertedFileStream == null) {
File tabFile = null;
boolean tempFilesRequired = false;
try {
Path tabFilePath = storageIO.getFileSystemPath();
tabFile = tabFilePath.toFile();
} catch (UnsupportedDataAccessOperationException uoex) {
// this means there is no direct filesystem path for this object; it's ok!
logger.fine("Could not open source file as a local Path - will go the temp file route.");
tempFilesRequired = true;
} catch (IOException ioex) {
// this is likely a fatal condition, as in, the file is unaccessible:
return null;
}
if (tempFilesRequired) {
ReadableByteChannel tabFileChannel = null;
try {
logger.fine("opening datafFileIO for the source tabular file...");
storageIO.open();
tabFileChannel = storageIO.getReadChannel();
FileChannel tempFileChannel;
tabFile = File.createTempFile("tempTabFile", ".tmp");
tempFileChannel = new FileOutputStream(tabFile).getChannel();
tempFileChannel.transferFrom(tabFileChannel, 0, storageIO.getSize());
} catch (IOException ioex) {
logger.warning("caught IOException trying to store tabular file " + storageIO.getDataFile().getStorageIdentifier() + " as a temp file.");
return null;
}
}
if (tabFile == null) {
return null;
}
if (tabFile.length() > 0) {
File formatConvertedFile = runFormatConversion(file, tabFile, formatRequested);
// cache the result for future use:
if (formatConvertedFile != null && formatConvertedFile.exists()) {
try {
storageIO.savePathAsAux(Paths.get(formatConvertedFile.getAbsolutePath()), formatRequested);
} catch (IOException ex) {
logger.warning("failed to save cached format " + formatRequested + " for " + file.getStorageIdentifier());
// We'll assume that this is a non-fatal condition.
}
// re-open the generated file:
try {
convertedFileStream = new FileInputStream(formatConvertedFile);
convertedFileSize = formatConvertedFile.length();
} catch (FileNotFoundException ioex) {
logger.warning("Failed to open generated format " + formatRequested + " for " + file.getStorageIdentifier());
return null;
}
}
}
}
// download API instance writer:
if (convertedFileStream != null && convertedFileSize > 0) {
InputStreamIO inputStreamIO = null;
try {
inputStreamIO = new InputStreamIO(convertedFileStream, convertedFileSize);
} catch (IOException ioex) {
return null;
}
inputStreamIO.setMimeType(formatType);
String fileName = storageIO.getFileName();
if (fileName == null || fileName.isEmpty()) {
fileName = "f" + file.getId().toString();
}
inputStreamIO.setFileName(generateAltFileName(formatRequested, fileName));
return inputStreamIO;
}
return null;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class FileAccessIO method getDatasetDirectory.
private String getDatasetDirectory() throws IOException {
if (dvObject == null) {
throw new IOException("No DvObject defined in the Data Access Object");
}
Path datasetDirectoryPath = null;
if (dvObject instanceof Dataset) {
datasetDirectoryPath = this.getDataset().getFileSystemDirectory();
} else if (dvObject instanceof DataFile) {
datasetDirectoryPath = this.getDataFile().getOwner().getFileSystemDirectory();
} else if (dvObject instanceof Dataverse) {
throw new IOException("FileAccessIO: Dataverses are not a supported dvObject");
}
if (datasetDirectoryPath == null) {
throw new IOException("Could not determine the filesystem directory of the parent dataset.");
}
String datasetDirectory = datasetDirectoryPath.toString();
if (dvObject.getStorageIdentifier() == null || dvObject.getStorageIdentifier().isEmpty()) {
throw new IOException("Data Access: No local storage identifier defined for this datafile.");
}
return datasetDirectory;
}
use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.
the class S3AccessIO method open.
@Override
public void open(DataAccessOption... options) throws IOException {
if (s3 == null) {
throw new IOException("ERROR: s3 not initialised. ");
}
if (bucketName == null || !s3.doesBucketExist(bucketName)) {
throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets.");
}
DataAccessRequest req = this.getRequest();
if (isWriteAccessRequested(options)) {
isWriteAccess = true;
isReadAccess = false;
} else {
isWriteAccess = false;
isReadAccess = true;
}
if (dvObject instanceof DataFile) {
String storageIdentifier = dvObject.getStorageIdentifier();
DataFile dataFile = this.getDataFile();
if (req != null && req.getParameter("noVarHeader") != null) {
this.setNoVarHeader(true);
}
if (storageIdentifier == null || "".equals(storageIdentifier)) {
throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile.");
}
if (isReadAccess) {
key = getMainFileKey();
S3Object s3object = s3.getObject(new GetObjectRequest(bucketName, key));
InputStream in = s3object.getObjectContent();
if (in == null) {
throw new IOException("Cannot get Object" + key);
}
this.setInputStream(in);
setChannel(Channels.newChannel(in));
this.setSize(s3object.getObjectMetadata().getContentLength());
if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
String varHeaderLine = generateVariableHeader(datavariables);
this.setVarHeader(varHeaderLine);
}
} else if (isWriteAccess) {
key = dataFile.getOwner().getAuthority() + "/" + this.getDataFile().getOwner().getIdentifier();
if (storageIdentifier.startsWith(S3_IDENTIFIER_PREFIX + "://")) {
key += "/" + storageIdentifier.substring(storageIdentifier.lastIndexOf(":") + 1);
} else {
key += "/" + storageIdentifier;
dvObject.setStorageIdentifier(S3_IDENTIFIER_PREFIX + "://" + bucketName + ":" + storageIdentifier);
}
}
this.setMimeType(dataFile.getContentType());
try {
this.setFileName(dataFile.getFileMetadata().getLabel());
} catch (Exception ex) {
this.setFileName("unknown");
}
} else if (dvObject instanceof Dataset) {
Dataset dataset = this.getDataset();
key = dataset.getAuthority() + "/" + dataset.getIdentifier();
dataset.setStorageIdentifier(S3_IDENTIFIER_PREFIX + "://" + key);
} else if (dvObject instanceof Dataverse) {
throw new IOException("Data Access: Invalid DvObject type : Dataverse");
} else {
throw new IOException("Data Access: Invalid DvObject type");
}
}
Aggregations