use of edu.harvard.iq.dataverse.datavariable.DataVariable in project dataverse by IQSS.
the class RJobRequest method getVariableIds.
public String[] getVariableIds() {
String[] variableIds = null;
List<String> rw = new ArrayList<>();
for (DataVariable dv : dataVariablesForRequest) {
rw.add("v" + dv.getId().toString());
}
variableIds = rw.toArray(new String[rw.size()]);
return variableIds;
}
use of edu.harvard.iq.dataverse.datavariable.DataVariable in project dataverse by IQSS.
the class DownloadInstanceWriter method writeTo.
@Override
public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[] annotation, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream outstream) throws IOException, WebApplicationException {
if (di.getDownloadInfo() != null && di.getDownloadInfo().getDataFile() != null) {
DataAccessRequest daReq = new DataAccessRequest();
DataFile dataFile = di.getDownloadInfo().getDataFile();
StorageIO<DataFile> storageIO = DataAccess.getStorageIO(dataFile, daReq);
if (storageIO != null) {
storageIO.open();
if (di.getConversionParam() != null) {
if (di.getConversionParam().equals("imageThumb") && !dataFile.isHarvested()) {
if ("".equals(di.getConversionParamValue())) {
storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE);
} else {
try {
int size = new Integer(di.getConversionParamValue());
if (size > 0) {
storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, size);
}
} catch (java.lang.NumberFormatException ex) {
storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE);
}
// and, since we now have tabular data files that can
// have thumbnail previews... obviously, we don't want to
// add the variable header to the image stream!
storageIO.setNoVarHeader(Boolean.TRUE);
storageIO.setVarHeader(null);
}
} else if (dataFile.isTabularData()) {
logger.fine("request for tabular data download;");
if (di.getConversionParam().equals("noVarHeader")) {
logger.fine("tabular data with no var header requested");
storageIO.setNoVarHeader(Boolean.TRUE);
storageIO.setVarHeader(null);
} else if (di.getConversionParam().equals("format")) {
if ("original".equals(di.getConversionParamValue())) {
logger.fine("stored original of an ingested file requested");
storageIO = StoredOriginalFile.retreive(storageIO);
} else {
// Other format conversions:
logger.fine("format conversion on a tabular file requested (" + di.getConversionParamValue() + ")");
String requestedMimeType = di.getServiceFormatType(di.getConversionParam(), di.getConversionParamValue());
if (requestedMimeType == null) {
// default mime type, in case real type is unknown;
// (this shouldn't happen in real life - but just in case):
requestedMimeType = "application/octet-stream";
}
storageIO = DataConverter.performFormatConversion(dataFile, storageIO, di.getConversionParamValue(), requestedMimeType);
}
} else if (di.getConversionParam().equals("subset")) {
logger.fine("processing subset request.");
if (di.getExtraArguments() != null && di.getExtraArguments().size() > 0) {
logger.fine("processing extra arguments list of length " + di.getExtraArguments().size());
List<Integer> variablePositionIndex = new ArrayList<>();
String subsetVariableHeader = null;
for (int i = 0; i < di.getExtraArguments().size(); i++) {
DataVariable variable = (DataVariable) di.getExtraArguments().get(i);
if (variable != null) {
if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) {
logger.fine("adding variable id " + variable.getId() + " to the list.");
variablePositionIndex.add(variable.getFileOrder());
if (subsetVariableHeader == null) {
subsetVariableHeader = variable.getName();
} else {
subsetVariableHeader = subsetVariableHeader.concat("\t");
subsetVariableHeader = subsetVariableHeader.concat(variable.getName());
}
} else {
logger.warning("variable does not belong to this data file.");
}
}
}
if (variablePositionIndex.size() > 0) {
try {
File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp");
TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator();
tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t");
if (tempSubsetFile.exists()) {
FileInputStream subsetStream = new FileInputStream(tempSubsetFile);
long subsetSize = tempSubsetFile.length();
InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize);
logger.fine("successfully created subset output stream.");
subsetVariableHeader = subsetVariableHeader.concat("\n");
subsetStreamIO.setVarHeader(subsetVariableHeader);
String tabularFileName = storageIO.getFileName();
if (tabularFileName != null && tabularFileName.endsWith(".tab")) {
tabularFileName = tabularFileName.replaceAll("\\.tab$", "-subset.tab");
} else if (tabularFileName != null && !"".equals(tabularFileName)) {
tabularFileName = tabularFileName.concat("-subset.tab");
} else {
tabularFileName = "subset.tab";
}
subsetStreamIO.setFileName(tabularFileName);
subsetStreamIO.setMimeType(storageIO.getMimeType());
storageIO = subsetStreamIO;
} else {
storageIO = null;
}
} catch (IOException ioex) {
storageIO = null;
}
}
} else {
logger.fine("empty list of extra arguments.");
}
}
}
if (storageIO == null) {
throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
}
} else {
if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && isRedirectToS3()) {
// [attempt to] redirect:
String redirect_url_str = ((S3AccessIO) storageIO).generateTemporaryS3Url();
// better exception handling here?
logger.info("Data Access API: direct S3 url: " + redirect_url_str);
URI redirect_uri;
try {
redirect_uri = new URI(redirect_url_str);
} catch (URISyntaxException ex) {
logger.info("Data Access API: failed to create S3 redirect url (" + redirect_url_str + ")");
redirect_uri = null;
}
if (redirect_uri != null) {
// definitely close the (still open) S3 input stream,
// since we are not going to use it. The S3 documentation
// emphasizes that it is very important not to leave these
// lying around un-closed, since they are going to fill
// up the S3 connection pool!
storageIO.getInputStream().close();
// increment the download count, if necessary:
if (di.getGbr() != null) {
try {
logger.fine("writing guestbook response, for an S3 download redirect.");
Command<?> cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner());
di.getCommand().submit(cmd);
} catch (CommandException e) {
}
}
// finally, issue the redirect:
Response response = Response.seeOther(redirect_uri).build();
logger.info("Issuing redirect to the file location on S3.");
throw new RedirectionException(response);
}
}
}
InputStream instream = storageIO.getInputStream();
if (instream != null) {
// headers:
String fileName = storageIO.getFileName();
String mimeType = storageIO.getMimeType();
// Provide both the "Content-disposition" and "Content-Type" headers,
// to satisfy the widest selection of browsers out there.
httpHeaders.add("Content-disposition", "attachment; filename=\"" + fileName + "\"");
httpHeaders.add("Content-Type", mimeType + "; name=\"" + fileName + "\"");
long contentSize;
boolean useChunkedTransfer = false;
// if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) {
if ((contentSize = getContentSize(storageIO)) > 0) {
logger.fine("Content size (retrieved from the AccessObject): " + contentSize);
httpHeaders.add("Content-Length", contentSize);
} else {
// httpHeaders.add("Transfer-encoding", "chunked");
// useChunkedTransfer = true;
}
// (the httpHeaders map must be modified *before* writing any
// data in the output stream!)
int bufsize;
byte[] bffr = new byte[4 * 8192];
byte[] chunkClose = "\r\n".getBytes();
if (storageIO.getVarHeader() != null) {
if (storageIO.getVarHeader().getBytes().length > 0) {
if (useChunkedTransfer) {
String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length);
outstream.write(chunkSizeLine.getBytes());
}
outstream.write(storageIO.getVarHeader().getBytes());
if (useChunkedTransfer) {
outstream.write(chunkClose);
}
}
}
while ((bufsize = instream.read(bffr)) != -1) {
if (useChunkedTransfer) {
String chunkSizeLine = String.format("%x\r\n", bufsize);
outstream.write(chunkSizeLine.getBytes());
}
outstream.write(bffr, 0, bufsize);
if (useChunkedTransfer) {
outstream.write(chunkClose);
}
}
if (useChunkedTransfer) {
String chunkClosing = "0\r\n\r\n";
outstream.write(chunkClosing.getBytes());
}
logger.fine("di conversion param: " + di.getConversionParam() + ", value: " + di.getConversionParamValue());
if (di.getGbr() != null && !(isThumbnailDownload(di) || isPreprocessedMetadataDownload(di))) {
try {
logger.fine("writing guestbook response.");
Command<?> cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner());
di.getCommand().submit(cmd);
} catch (CommandException e) {
}
} else {
logger.fine("not writing guestbook response");
}
instream.close();
outstream.close();
return;
}
}
}
throw new WebApplicationException(Response.Status.NOT_FOUND);
}
use of edu.harvard.iq.dataverse.datavariable.DataVariable in project dataverse by IQSS.
the class Access method datafile.
@Path("datafile/{fileId}")
@GET
public // @Produces({ "application/xml" })
DownloadInstance datafile(@PathParam("fileId") Long fileId, @QueryParam("gbrecs") Boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
DataFile df = dataFileService.find(fileId);
GuestbookResponse gbr = null;
if (df == null) {
logger.warning("Access: datafile service could not locate a DataFile object for id " + fileId + "!");
throw new NotFoundException();
}
if (df.isHarvested()) {
throw new NotFoundException();
// (nobody should ever be using this API on a harvested DataFile)!
}
if (apiToken == null || apiToken.equals("")) {
apiToken = headers.getHeaderString(API_KEY_HEADER);
}
if (gbrecs == null && df.isReleased()) {
// Write Guestbook record if not done previously and file is released
User apiTokenUser = findAPITokenUser(apiToken);
gbr = guestbookResponseService.initAPIGuestbookResponse(df.getOwner(), df, session, apiTokenUser);
}
// This will throw a ForbiddenException if access isn't authorized:
checkAuthorization(df, apiToken);
DownloadInfo dInfo = new DownloadInfo(df);
logger.fine("checking if thumbnails are supported on this file.");
if (FileUtil.isThumbnailSupported(df)) {
dInfo.addServiceAvailable(new OptionalAccessService("thumbnail", "image/png", "imageThumb=true", "Image Thumbnail (64x64)"));
}
if (df.isTabularData()) {
String originalMimeType = df.getDataTable().getOriginalFileFormat();
dInfo.addServiceAvailable(new OptionalAccessService("original", originalMimeType, "format=original", "Saved original (" + originalMimeType + ")"));
dInfo.addServiceAvailable(new OptionalAccessService("R", "application/x-rlang-transport", "format=RData", "Data in R format"));
dInfo.addServiceAvailable(new OptionalAccessService("preprocessed", "application/json", "format=prep", "Preprocessed data in JSON"));
dInfo.addServiceAvailable(new OptionalAccessService("subset", "text/tab-separated-values", "variables=<LIST>", "Column-wise Subsetting"));
}
DownloadInstance downloadInstance = new DownloadInstance(dInfo);
if (gbr != null) {
downloadInstance.setGbr(gbr);
downloadInstance.setDataverseRequestService(dvRequestService);
downloadInstance.setCommand(engineSvc);
}
for (String key : uriInfo.getQueryParameters().keySet()) {
String value = uriInfo.getQueryParameters().getFirst(key);
if (downloadInstance.isDownloadServiceSupported(key, value)) {
logger.fine("is download service supported? key=" + key + ", value=" + value);
if (downloadInstance.getConversionParam().equals("subset")) {
String subsetParam = downloadInstance.getConversionParamValue();
String[] variableIdParams = subsetParam.split(",");
if (variableIdParams != null && variableIdParams.length > 0) {
logger.fine(variableIdParams.length + " tokens;");
for (int i = 0; i < variableIdParams.length; i++) {
logger.fine("token: " + variableIdParams[i]);
String token = variableIdParams[i].replaceFirst("^v", "");
Long variableId = null;
try {
variableId = new Long(token);
} catch (NumberFormatException nfe) {
variableId = null;
}
if (variableId != null) {
logger.fine("attempting to look up variable id " + variableId);
if (variableService != null) {
DataVariable variable = variableService.find(variableId);
if (variable != null) {
if (downloadInstance.getExtraArguments() == null) {
downloadInstance.setExtraArguments(new ArrayList<Object>());
}
logger.fine("putting variable id " + variable.getId() + " on the parameters list of the download instance.");
downloadInstance.getExtraArguments().add(variable);
// if (!variable.getDataTable().getDataFile().getId().equals(sf.getId())) {
// variableList.add(variable);
// }
}
} else {
logger.fine("variable service is null.");
}
}
}
}
}
logger.fine("downloadInstance: " + downloadInstance.getConversionParam() + "," + downloadInstance.getConversionParamValue());
break;
} else {
// Service unknown/not supported/bad arguments, etc.:
// TODO: throw new ServiceUnavailableException();
}
}
/*
* Provide "Access-Control-Allow-Origin" header:
*/
response.setHeader("Access-Control-Allow-Origin", "*");
// return retValue;
return downloadInstance;
}
use of edu.harvard.iq.dataverse.datavariable.DataVariable in project dataverse by IQSS.
the class StorageIO method generateVariableHeader.
public String generateVariableHeader(List<DataVariable> dvs) {
String varHeader = null;
if (dvs != null) {
Iterator<DataVariable> iter = dvs.iterator();
DataVariable dv;
if (iter.hasNext()) {
dv = iter.next();
varHeader = dv.getName();
}
while (iter.hasNext()) {
dv = iter.next();
varHeader = varHeader + "\t" + dv.getName();
}
varHeader = varHeader + "\n";
}
return varHeader;
}
use of edu.harvard.iq.dataverse.datavariable.DataVariable in project dataverse by IQSS.
the class DdiExportUtil method createDataDscr.
// Methods specific to the tabular data ("<dataDscr>") section.
// Note that these do NOT operate on DTO objects, but instead directly
// on Dataverse DataVariable, DataTable, etc. objects.
// This is because for this release (4.5) we are recycling the already available
// code, and this is what we got. (We already have DTO objects for DataTable,
// and DataVariable, etc., but the current version JsonPrinter.jsonAsDatasetDto()
// does not produce JSON for these objects - it stops at DataFile. Eventually
// we want all of our objects to be exportable as JSON, and then all the exports
// can go through the same DTO state... But we don't have time for it now;
// plus, the structure of file-level metadata is currently being re-designed,
// so we probably should not invest any time into it right now). -- L.A. 4.5
private static void createDataDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException {
if (datasetVersion.getFileMetadatas() == null || datasetVersion.getFileMetadatas().isEmpty()) {
return;
}
boolean tabularData = false;
// tabular datafile.
for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) {
DataFile dataFile = fileMetadata.getDataFile();
if (dataFile != null && dataFile.isTabularData()) {
if (!tabularData) {
xmlw.writeStartElement("dataDscr");
tabularData = true;
}
List<DataVariable> vars = dataFile.getDataTable().getDataVariables();
for (DataVariable var : vars) {
createVarDDI(xmlw, var);
}
}
}
if (tabularData) {
// dataDscr
xmlw.writeEndElement();
}
}
Aggregations