Examples with DataFile - edu.harvard.iq.dataverse.DataFile

Example 11 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class S3AccessIO method savePath.

// StorageIO method for copying a local Path (for ex., a temp file), into this DataAccess location:
@Override
public void savePath(Path fileSystemPath) throws IOException {
    long newFileSize = -1;
    if (!this.canWrite()) {
        open(DataAccessOption.WRITE_ACCESS);
    }
    try {
        File inputFile = fileSystemPath.toFile();
        if (dvObject instanceof DataFile) {
            s3.putObject(new PutObjectRequest(bucketName, key, inputFile));
            newFileSize = inputFile.length();
        } else {
            throw new IOException("DvObject type other than datafile is not yet supported");
        }
    } catch (SdkClientException ioex) {
        String failureMsg = ioex.getMessage();
        if (failureMsg == null) {
            failureMsg = "S3AccessIO: Unknown exception occured while uploading a local file into S3Object";
        }
        throw new IOException(failureMsg);
    }
    // if it has uploaded successfully, we can reset the size
    // of the object:
    setSize(newFileSize);
}

Also used : DataFile(edu.harvard.iq.dataverse.DataFile) SdkClientException(com.amazonaws.SdkClientException) IOException(java.io.IOException) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File) PutObjectRequest(com.amazonaws.services.s3.model.PutObjectRequest)

Example 12 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class StoredOriginalFile method retreive.

public static StorageIO<DataFile> retreive(StorageIO<DataFile> storageIO) {
    String originalMimeType;
    DataFile dataFile = storageIO.getDataFile();
    if (dataFile == null) {
        return null;
    }
    if (dataFile.getDataTable() != null) {
        originalMimeType = dataFile.getDataTable().getOriginalFileFormat();
    } else {
        return null;
    }
    long storedOriginalSize;
    InputStreamIO inputStreamIO;
    try {
        storageIO.open();
        Channel storedOriginalChannel = storageIO.openAuxChannel(SAVED_ORIGINAL_FILENAME_EXTENSION);
        storedOriginalSize = storageIO.getAuxObjectSize(SAVED_ORIGINAL_FILENAME_EXTENSION);
        inputStreamIO = new InputStreamIO(Channels.newInputStream((ReadableByteChannel) storedOriginalChannel), storedOriginalSize);
        logger.fine("Opened stored original file as Aux " + SAVED_ORIGINAL_FILENAME_EXTENSION);
    } catch (IOException ioEx) {
        // The original file not saved, or could not be opened.
        logger.fine("Failed to open stored original file as Aux " + SAVED_ORIGINAL_FILENAME_EXTENSION + "!");
        return null;
    }
    if (originalMimeType != null && !originalMimeType.isEmpty()) {
        if (originalMimeType.matches("application/x-dvn-.*-zip")) {
            inputStreamIO.setMimeType("application/zip");
        } else {
            inputStreamIO.setMimeType(originalMimeType);
        }
    } else {
        inputStreamIO.setMimeType("application/x-unknown");
    }
    String fileName = storageIO.getFileName();
    if (fileName != null) {
        if (originalMimeType != null) {
            String origFileExtension = generateOriginalExtension(originalMimeType);
            inputStreamIO.setFileName(fileName.replaceAll(".tab$", origFileExtension));
        } else {
            inputStreamIO.setFileName(fileName.replaceAll(".tab$", ""));
        }
    }
    return inputStreamIO;
}

Also used : DataFile(edu.harvard.iq.dataverse.DataFile) ReadableByteChannel(java.nio.channels.ReadableByteChannel) Channel(java.nio.channels.Channel) IOException(java.io.IOException)

Example 13 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class RemoteDataFrameService method execute.

/*
     * Execute a data frame creation process:
     * 
     * (TODO: describe the process here; -- L.A. 4.0 alpha 1)
     *
     * @param sro    a RJobRequest object that contains various parameters
     * @return    a Map that contains various information about results
    
     * TODO: replace this Map with a dedicated RJobResult object; -- L.A. 4.0 alpha 1
     */
public Map<String, String> execute(RJobRequest sro) {
    dbgLog.fine("RemoteDataFrameService: execute() starts here.");
    // set the return object
    Map<String, String> result = new HashMap<String, String>();
    try {
        // TODO:
        // Split the code below into neat individual methods - for
        // initializing the connection, loading the remote libraries,
        // creating remote R vectors for the parameters that will be used
        // to create the data frame - variable names, labels, etc., and
        // executing the main request and any necessary post-processing
        // -- L.A. 4.0 alpha 1
        // Set up an Rserve connection
        dbgLog.fine("sro dump:\n" + ToStringBuilder.reflectionToString(sro, ToStringStyle.MULTI_LINE_STYLE));
        dbgLog.fine("RSERVE_USER=" + RSERVE_USER + "[default=rserve]");
        dbgLog.fine("RSERVE_PASSWORD=" + RSERVE_PWD + "[default=rserve]");
        dbgLog.fine("RSERVE_PORT=" + RSERVE_PORT + "[default=6311]");
        dbgLog.fine("RSERVE_HOST=" + RSERVE_HOST);
        RConnection c = new RConnection(RSERVE_HOST, RSERVE_PORT);
        c.login(RSERVE_USER, RSERVE_PWD);
        dbgLog.info(">" + c.eval("R.version$version.string").asString() + "<");
        // check working directories
        // This needs to be done *before* we try to create any files
        // there!
        setupWorkingDirectory(c);
        // send the data file to the Rserve side:
        String infile = sro.getTabularDataFileName();
        InputStream inb = new BufferedInputStream(new FileInputStream(infile));
        int bufsize;
        byte[] bffr = new byte[1024];
        RFileOutputStream os = c.createFile(tempFileNameIn);
        while ((bufsize = inb.read(bffr)) != -1) {
            os.write(bffr, 0, bufsize);
        }
        os.close();
        inb.close();
        // Rserve code starts here
        dbgLog.fine("wrkdir=" + RSERVE_TMP_DIR);
        String RversionLine = "R.Version()$version.string";
        String Rversion = c.eval(RversionLine).asString();
        // We need to initialize our R session:
        // send custom R code library over to the Rserve and load the code:
        String rscript = readLocalResource(DATAVERSE_R_FUNCTIONS);
        c.voidEval(rscript);
        dbgLog.fine("raw variable type=" + sro.getVariableTypes());
        c.assign("vartyp", new REXPInteger(sro.getVariableTypes()));
        String[] tmpt = c.eval("vartyp").asStrings();
        dbgLog.fine("vartyp length=" + tmpt.length + "\t " + StringUtils.join(tmpt, ","));
        // variable *formats* - not to be confused with variable *types*!
        // these specify extra, optional format specifications - for example,
        // String variables may represent date and time values.
        Map<String, String> tmpFmt = sro.getVariableFormats();
        dbgLog.fine("tmpFmt=" + tmpFmt);
        if (tmpFmt != null) {
            Set<String> vfkeys = tmpFmt.keySet();
            String[] tmpfk = (String[]) vfkeys.toArray(new String[vfkeys.size()]);
            String[] tmpfv = getValueSet(tmpFmt, tmpfk);
            c.assign("tmpfk", new REXPString(tmpfk));
            c.assign("tmpfv", new REXPString(tmpfv));
            String fmtNamesLine = "names(tmpfv)<- tmpfk";
            c.voidEval(fmtNamesLine);
            String fmtValuesLine = "varFmt<- as.list(tmpfv)";
            c.voidEval(fmtValuesLine);
        } else {
            String[] varFmtN = {};
            List<String> varFmtV = new ArrayList<String>();
            c.assign("varFmt", new REXPList(new RList(varFmtV, varFmtN)));
        }
        // Variable names:
        String[] jvnamesRaw = sro.getVariableNames();
        String[] jvnames = null;
        if (sro.hasUnsafeVariableNames) {
            // create  list
            jvnames = sro.safeVarNames;
            dbgLog.fine("renamed=" + StringUtils.join(jvnames, ","));
        } else {
            jvnames = jvnamesRaw;
        }
        c.assign("vnames", new REXPString(jvnames));
        // confirm:
        String[] tmpjvnames = c.eval("vnames").asStrings();
        dbgLog.fine("vnames:" + StringUtils.join(tmpjvnames, ","));
        // read.dataverseTabData method, from dataverse_r_functions.R,
        // uses R's standard scan() function to read the tabular data we've
        // just transfered over and turn it into a dataframe. It adds some
        // custom post-processing too - restores missing values, converts
        // strings representing dates and times into R date and time objects,
        // and more.
        // Parameters for the read.dataverseTabData method executed on the R side:
        // file -> tempFileName
        // col.names -> Arrays.deepToString(new REXPString(jvnames)).asStrings())
        // colClassesx -> Arrays.deepToString((new REXPInteger(sro.getVariableTypes())).asStrings())
        // varFormat -> Arrays.deepToString((new REXPString(getValueSet(tmpFmt, tmpFmt.keySet().toArray(new String[tmpFmt.keySet().size()])))).asStrings())
        dbgLog.fine("read.dataverseTabData parameters:");
        dbgLog.fine("col.names = " + Arrays.deepToString((new REXPString(jvnames)).asStrings()));
        dbgLog.fine("colClassesx = " + Arrays.deepToString((new REXPInteger(sro.getVariableTypes())).asStrings()));
        dbgLog.fine("varFormat = " + Arrays.deepToString((new REXPString(getValueSet(tmpFmt, tmpFmt.keySet().toArray(new String[tmpFmt.keySet().size()])))).asStrings()));
        String readtableline = "x<-read.dataverseTabData(file='" + tempFileNameIn + "', col.names=vnames, colClassesx=vartyp, varFormat=varFmt )";
        dbgLog.fine("readtable=" + readtableline);
        c.voidEval(readtableline);
        if (sro.hasUnsafeVariableNames) {
            dbgLog.fine("unsafeVariableNames exist");
            jvnames = sro.safeVarNames;
            String[] rawNameSet = sro.renamedVariableArray;
            String[] safeNameSet = sro.renamedResultArray;
            c.assign("tmpRN", new REXPString(rawNameSet));
            c.assign("tmpSN", new REXPString(safeNameSet));
            String raw2safevarNameTableLine = "names(tmpRN)<- tmpSN";
            c.voidEval(raw2safevarNameTableLine);
            String attrRsafe2rawLine = "attr(x, 'Rsafe2raw')<- as.list(tmpRN)";
            c.voidEval(attrRsafe2rawLine);
        } else {
            String attrRsafe2rawLine = "attr(x, 'Rsafe2raw')<-list();";
            c.voidEval(attrRsafe2rawLine);
        }
        // Restore NAs (missign values) in the data frame:
        // (these are encoded as empty strings in dataverse tab files)
        // Why are we doing it here? And not in the dataverse_r_functions.R
        // fragment?
        String asIsline = "for (i in 1:dim(x)[2]){ " + "if (attr(x,'var.type')[i] == 0) {" + "x[[i]]<-I(x[[i]]);  x[[i]][ x[[i]] == '' ]<-NA  }}";
        c.voidEval(asIsline);
        String[] varLabels = sro.getVariableLabels();
        c.assign("varlabels", new REXPString(varLabels));
        String attrVarLabelsLine = "attr(x, 'var.labels')<-varlabels";
        c.voidEval(attrVarLabelsLine);
        // Confirm:
        String[] vlbl = c.eval("attr(x, 'var.labels')").asStrings();
        dbgLog.fine("varlabels=" + StringUtils.join(vlbl, ","));
        // create the VALTABLE and VALORDER lists:
        c.voidEval("VALTABLE<-list()");
        c.voidEval("VALORDER<-list()");
        //  In the fragment below, we'll populate the VALTABLE list that we've
        // just created with the actual values and labels of our categorical varaibles.
        // TODO:
        // This code has been imported from the DVN v2-3
        // implementation. I keep wondering if there is a simpler way to
        // achive this - to pass these maps of values and labels to R
        // in fewer steps/with less code - ?
        // -- L.A. 4.3
        Map<String, Map<String, String>> vltbl = sro.getValueTable();
        Map<String, List<String>> orderedCategoryValues = sro.getCategoryValueOrders();
        String[] variableIds = sro.getVariableIds();
        for (int j = 0; j < variableIds.length; j++) {
            // if this variable has a value-label table,
            // pass its key and value arrays to Rserve;
            // finalize a value-table on the Rserve side:
            String varId = variableIds[j];
            if (vltbl.containsKey(varId)) {
                Map<String, String> tmp = (HashMap<String, String>) vltbl.get(varId);
                Set<String> vlkeys = tmp.keySet();
                String[] tmpk = (String[]) vlkeys.toArray(new String[vlkeys.size()]);
                String[] tmpv = getValueSet(tmp, tmpk);
                dbgLog.fine("tmp:k=" + StringUtils.join(tmpk, ","));
                dbgLog.fine("tmp:v=" + StringUtils.join(tmpv, ","));
                // index number starts from 1(not 0):
                int indx = j + 1;
                dbgLog.fine("index=" + indx);
                if (tmpv.length > 0) {
                    c.assign("tmpk", new REXPString(tmpk));
                    c.assign("tmpv", new REXPString(tmpv));
                    String namesValueLine = "names(tmpv)<- tmpk";
                    c.voidEval(namesValueLine);
                    String sbvl = "VALTABLE[['" + Integer.toString(indx) + "']]" + "<- as.list(tmpv)";
                    dbgLog.fine("frag=" + sbvl);
                    c.voidEval(sbvl);
                    // confirmation test for j-th variable name
                    REXP jl = c.parseAndEval(sbvl);
                    dbgLog.fine("jl(" + j + ") = " + jl);
                }
            }
            if (orderedCategoryValues != null && orderedCategoryValues.containsKey(varId)) {
                int indx = j + 1;
                List<String> orderList = orderedCategoryValues.get(varId);
                if (orderList != null) {
                    String[] ordv = (String[]) orderList.toArray(new String[orderList.size()]);
                    dbgLog.fine("ordv=" + StringUtils.join(ordv, ","));
                    c.assign("ordv", new REXPString(ordv));
                    String sbvl = "VALORDER[['" + Integer.toString(indx) + "']]" + "<- as.list(ordv)";
                    dbgLog.fine("VALORDER[...]=" + sbvl);
                    c.voidEval(sbvl);
                } else {
                    dbgLog.fine("NULL orderedCategoryValues list.");
                }
            }
        }
        // And now we store the VALTABLE and MSVLTBL as attributes of the
        // dataframe we are cooking:
        dbgLog.fine("length of vl=" + c.eval("length(VALTABLE)").asInteger());
        String attrValTableLine = "attr(x, 'val.table')<-VALTABLE";
        c.voidEval(attrValTableLine);
        String msvStartLine = "MSVLTBL<-list();";
        c.voidEval(msvStartLine);
        String attrMissvalLine = "attr(x, 'missval.table')<-MSVLTBL";
        c.voidEval(attrMissvalLine);
        // But we are not done, with these value label maps... We now need
        // to call these methods from the dataverse_r_functions.R script
        // to further process the lists. Among other things, they will
        // create these new lists - value index and missing value index, that
        // simply indicate which variables have any of the above; these will
        // also be saved as attributes of the data frame, val.index and
        // missval.index respectively. But, also, the methods will reprocess
        // and overwite the val.table and missval.table attributes already stored in
        // the dataframe. I don't fully understand why that is necessary, or what it is
        // that we are actually adding to the lists there... Another TODO: ?
        String createVIndexLine = "x<-createvalindex(dtfrm=x, attrname='val.index');";
        c.voidEval(createVIndexLine);
        String createMVIndexLine = "x<-createvalindex(dtfrm=x, attrname='missval.index');";
        c.voidEval(createMVIndexLine);
        // And now we'll call the last method from the R script - createDataverseDataFrame();
        // It should probably be renamed. The dataframe has already been created.
        // what this method does, it goes through the frame, and changes the
        // vectors representing categorical variables to R factors.
        // For example, if this tabular file was produced from a Stata file
        // that had a categorical in which "Male" and "Female" were represented
        // with 0 and 1. In the Dataverse datbase, the string values "Male" and
        // "Female" are now stored as "categorical value labels". And the column
        // in the tab file has numeric 1 and 0s. That's what the R
        // dataframe was created from, so it now has a numeric vector of 1s and 0s
        // representing this variable. So in this step we are going
        // to change this vector into a factor, using the labels and values
        // that we already passed over via Rserve and stored in the val.table, above.
        // TODO:
        // I'm going to propose that we go back to what we used to do back in
        // DVN 2-3.* - instead of giving the user a single dataframe (.RData)
        // file, provide a zip file, with the data frame, and also a README
        // file with some documentation explaining how the data frame was
        // created, and pointing out some potential issues stemming from the
        // conversion between formats. Converting Stata categoricals into
        // R factors is one of such issues (if nothing else, do note that
        // the UNF of the datafile with the column described in the example
        // above will change, if the resulting R dataframe is reingested! See
        // the UNF documentation for more info...). We may also make this
        // download interactive - giving the user some options for how
        // to handle the conversion (so, another choice would be to convert
        // the above to a factor of "0" and "1"s), etc.
        // -- L.A. 4.3
        String dataFileName = "Data." + PID + "." + sro.getFormatRequested();
        // data file to be copied back to the dvn
        String dsnprfx = RSERVE_TMP_DIR + "/" + dataFileName;
        String dataverseDataFrameCommand = "createDataverseDataFrame(dtfrm=x," + "dwnldoptn='" + sro.getFormatRequested() + "'" + ", dsnprfx='" + dsnprfx + "')";
        c.voidEval(dataverseDataFrameCommand);
        int wbFileSize = getFileSize(c, dsnprfx);
        dbgLog.fine("wbFileSize=" + wbFileSize);
        // If the above succeeded, the dataframe has been saved on the
        // Rserve side as an .Rdata file. Now we can transfer it back to the
        // dataverse side:
        File localDataFrameFile = transferRemoteFile(c, dsnprfx, RWRKSP_FILE_PREFIX, "RData", wbFileSize);
        result.put("dataFrameFileName", localDataFrameFile.getAbsolutePath());
        if (localDataFrameFile != null) {
            dbgLog.fine("data frame file name: " + localDataFrameFile.getAbsolutePath());
        } else {
            dbgLog.fine("data frame file is null!");
        // throw an exception??
        }
        result.put("Rversion", Rversion);
        dbgLog.fine("result object (before closing the Rserve):\n" + result);
        String deleteLine = "file.remove('" + tempFileNameIn + "')";
        c.eval(deleteLine);
        c.close();
    } catch (RserveException rse) {
        // RserveException (Rserve is not running maybe?)
        // TODO: *ABSOLUTELY* need more diagnostics here!
        rse.printStackTrace();
        result.put("RexecError", "true");
        return result;
    } catch (REXPMismatchException mme) {
        mme.printStackTrace();
        result.put("RexecError", "true");
        return result;
    } catch (IOException ie) {
        ie.printStackTrace();
        result.put("RexecError", "true");
        return result;
    } catch (Exception ex) {
        ex.printStackTrace();
        result.put("RexecError", "true");
        return result;
    }
    return result;
}

Also used : DataFile(edu.harvard.iq.dataverse.DataFile)

Example 14 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class DownloadInstanceWriter method writeTo.

@Override
public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[] annotation, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream outstream) throws IOException, WebApplicationException {
    if (di.getDownloadInfo() != null && di.getDownloadInfo().getDataFile() != null) {
        DataAccessRequest daReq = new DataAccessRequest();
        DataFile dataFile = di.getDownloadInfo().getDataFile();
        StorageIO<DataFile> storageIO = DataAccess.getStorageIO(dataFile, daReq);
        if (storageIO != null) {
            storageIO.open();
            if (di.getConversionParam() != null) {
                if (di.getConversionParam().equals("imageThumb") && !dataFile.isHarvested()) {
                    if ("".equals(di.getConversionParamValue())) {
                        storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE);
                    } else {
                        try {
                            int size = new Integer(di.getConversionParamValue());
                            if (size > 0) {
                                storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, size);
                            }
                        } catch (java.lang.NumberFormatException ex) {
                            storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE);
                        }
                        // and, since we now have tabular data files that can
                        // have thumbnail previews... obviously, we don't want to
                        // add the variable header to the image stream!
                        storageIO.setNoVarHeader(Boolean.TRUE);
                        storageIO.setVarHeader(null);
                    }
                } else if (dataFile.isTabularData()) {
                    logger.fine("request for tabular data download;");
                    if (di.getConversionParam().equals("noVarHeader")) {
                        logger.fine("tabular data with no var header requested");
                        storageIO.setNoVarHeader(Boolean.TRUE);
                        storageIO.setVarHeader(null);
                    } else if (di.getConversionParam().equals("format")) {
                        if ("original".equals(di.getConversionParamValue())) {
                            logger.fine("stored original of an ingested file requested");
                            storageIO = StoredOriginalFile.retreive(storageIO);
                        } else {
                            // Other format conversions:
                            logger.fine("format conversion on a tabular file requested (" + di.getConversionParamValue() + ")");
                            String requestedMimeType = di.getServiceFormatType(di.getConversionParam(), di.getConversionParamValue());
                            if (requestedMimeType == null) {
                                // default mime type, in case real type is unknown;
                                // (this shouldn't happen in real life - but just in case):
                                requestedMimeType = "application/octet-stream";
                            }
                            storageIO = DataConverter.performFormatConversion(dataFile, storageIO, di.getConversionParamValue(), requestedMimeType);
                        }
                    } else if (di.getConversionParam().equals("subset")) {
                        logger.fine("processing subset request.");
                        if (di.getExtraArguments() != null && di.getExtraArguments().size() > 0) {
                            logger.fine("processing extra arguments list of length " + di.getExtraArguments().size());
                            List<Integer> variablePositionIndex = new ArrayList<>();
                            String subsetVariableHeader = null;
                            for (int i = 0; i < di.getExtraArguments().size(); i++) {
                                DataVariable variable = (DataVariable) di.getExtraArguments().get(i);
                                if (variable != null) {
                                    if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) {
                                        logger.fine("adding variable id " + variable.getId() + " to the list.");
                                        variablePositionIndex.add(variable.getFileOrder());
                                        if (subsetVariableHeader == null) {
                                            subsetVariableHeader = variable.getName();
                                        } else {
                                            subsetVariableHeader = subsetVariableHeader.concat("\t");
                                            subsetVariableHeader = subsetVariableHeader.concat(variable.getName());
                                        }
                                    } else {
                                        logger.warning("variable does not belong to this data file.");
                                    }
                                }
                            }
                            if (variablePositionIndex.size() > 0) {
                                try {
                                    File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp");
                                    TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator();
                                    tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t");
                                    if (tempSubsetFile.exists()) {
                                        FileInputStream subsetStream = new FileInputStream(tempSubsetFile);
                                        long subsetSize = tempSubsetFile.length();
                                        InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize);
                                        logger.fine("successfully created subset output stream.");
                                        subsetVariableHeader = subsetVariableHeader.concat("\n");
                                        subsetStreamIO.setVarHeader(subsetVariableHeader);
                                        String tabularFileName = storageIO.getFileName();
                                        if (tabularFileName != null && tabularFileName.endsWith(".tab")) {
                                            tabularFileName = tabularFileName.replaceAll("\\.tab$", "-subset.tab");
                                        } else if (tabularFileName != null && !"".equals(tabularFileName)) {
                                            tabularFileName = tabularFileName.concat("-subset.tab");
                                        } else {
                                            tabularFileName = "subset.tab";
                                        }
                                        subsetStreamIO.setFileName(tabularFileName);
                                        subsetStreamIO.setMimeType(storageIO.getMimeType());
                                        storageIO = subsetStreamIO;
                                    } else {
                                        storageIO = null;
                                    }
                                } catch (IOException ioex) {
                                    storageIO = null;
                                }
                            }
                        } else {
                            logger.fine("empty list of extra arguments.");
                        }
                    }
                }
                if (storageIO == null) {
                    throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
                }
            } else {
                if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && isRedirectToS3()) {
                    // [attempt to] redirect:
                    String redirect_url_str = ((S3AccessIO) storageIO).generateTemporaryS3Url();
                    // better exception handling here?
                    logger.info("Data Access API: direct S3 url: " + redirect_url_str);
                    URI redirect_uri;
                    try {
                        redirect_uri = new URI(redirect_url_str);
                    } catch (URISyntaxException ex) {
                        logger.info("Data Access API: failed to create S3 redirect url (" + redirect_url_str + ")");
                        redirect_uri = null;
                    }
                    if (redirect_uri != null) {
                        // definitely close the (still open) S3 input stream,
                        // since we are not going to use it. The S3 documentation
                        // emphasizes that it is very important not to leave these
                        // lying around un-closed, since they are going to fill
                        // up the S3 connection pool!
                        storageIO.getInputStream().close();
                        // increment the download count, if necessary:
                        if (di.getGbr() != null) {
                            try {
                                logger.fine("writing guestbook response, for an S3 download redirect.");
                                Command<?> cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner());
                                di.getCommand().submit(cmd);
                            } catch (CommandException e) {
                            }
                        }
                        // finally, issue the redirect:
                        Response response = Response.seeOther(redirect_uri).build();
                        logger.info("Issuing redirect to the file location on S3.");
                        throw new RedirectionException(response);
                    }
                }
            }
            InputStream instream = storageIO.getInputStream();
            if (instream != null) {
                // headers:
                String fileName = storageIO.getFileName();
                String mimeType = storageIO.getMimeType();
                // Provide both the "Content-disposition" and "Content-Type" headers,
                // to satisfy the widest selection of browsers out there.
                httpHeaders.add("Content-disposition", "attachment; filename=\"" + fileName + "\"");
                httpHeaders.add("Content-Type", mimeType + "; name=\"" + fileName + "\"");
                long contentSize;
                boolean useChunkedTransfer = false;
                // if ((contentSize = getFileSize(di, storageIO.getVarHeader())) > 0) {
                if ((contentSize = getContentSize(storageIO)) > 0) {
                    logger.fine("Content size (retrieved from the AccessObject): " + contentSize);
                    httpHeaders.add("Content-Length", contentSize);
                } else {
                // httpHeaders.add("Transfer-encoding", "chunked");
                // useChunkedTransfer = true;
                }
                // (the httpHeaders map must be modified *before* writing any
                // data in the output stream!)
                int bufsize;
                byte[] bffr = new byte[4 * 8192];
                byte[] chunkClose = "\r\n".getBytes();
                if (storageIO.getVarHeader() != null) {
                    if (storageIO.getVarHeader().getBytes().length > 0) {
                        if (useChunkedTransfer) {
                            String chunkSizeLine = String.format("%x\r\n", storageIO.getVarHeader().getBytes().length);
                            outstream.write(chunkSizeLine.getBytes());
                        }
                        outstream.write(storageIO.getVarHeader().getBytes());
                        if (useChunkedTransfer) {
                            outstream.write(chunkClose);
                        }
                    }
                }
                while ((bufsize = instream.read(bffr)) != -1) {
                    if (useChunkedTransfer) {
                        String chunkSizeLine = String.format("%x\r\n", bufsize);
                        outstream.write(chunkSizeLine.getBytes());
                    }
                    outstream.write(bffr, 0, bufsize);
                    if (useChunkedTransfer) {
                        outstream.write(chunkClose);
                    }
                }
                if (useChunkedTransfer) {
                    String chunkClosing = "0\r\n\r\n";
                    outstream.write(chunkClosing.getBytes());
                }
                logger.fine("di conversion param: " + di.getConversionParam() + ", value: " + di.getConversionParamValue());
                if (di.getGbr() != null && !(isThumbnailDownload(di) || isPreprocessedMetadataDownload(di))) {
                    try {
                        logger.fine("writing guestbook response.");
                        Command<?> cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner());
                        di.getCommand().submit(cmd);
                    } catch (CommandException e) {
                    }
                } else {
                    logger.fine("not writing guestbook response");
                }
                instream.close();
                outstream.close();
                return;
            }
        }
    }
    throw new WebApplicationException(Response.Status.NOT_FOUND);
}

Also used : CreateGuestbookResponseCommand(edu.harvard.iq.dataverse.engine.command.impl.CreateGuestbookResponseCommand) WebApplicationException(javax.ws.rs.WebApplicationException) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) DataFile(edu.harvard.iq.dataverse.DataFile) ArrayList(java.util.ArrayList) List(java.util.List) RedirectionException(javax.ws.rs.RedirectionException) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) CommandException(edu.harvard.iq.dataverse.engine.command.exception.CommandException) FileInputStream(java.io.FileInputStream) Response(javax.ws.rs.core.Response) DataFile(edu.harvard.iq.dataverse.DataFile) File(java.io.File)

Example 15 with DataFile

use of edu.harvard.iq.dataverse.DataFile in project dataverse by IQSS.

the class Meta method datafile.

@Path("datafile/{fileId}")
@GET
@Produces({ "text/xml" })
public String datafile(@PathParam("fileId") Long fileId, @QueryParam("exclude") String exclude, @QueryParam("include") String include, @Context HttpHeaders header, @Context HttpServletResponse response) throws NotFoundException, ServiceUnavailableException /*, PermissionDeniedException, AuthorizationRequiredException*/
{
    String retValue = "";
    DataFile dataFile = null;
    // httpHeaders.add("Content-disposition", "attachment; filename=\"dataverse_files.zip\"");
    // httpHeaders.add("Content-Type", "application/zip; name=\"dataverse_files.zip\"");
    response.setHeader("Content-disposition", "attachment; filename=\"dataverse_files.zip\"");
    dataFile = datafileService.find(fileId);
    if (dataFile == null) {
        throw new NotFoundException();
    }
    String fileName = dataFile.getFileMetadata().getLabel().replaceAll("\\.tab$", "-ddi.xml");
    response.setHeader("Content-disposition", "attachment; filename=\"" + fileName + "\"");
    response.setHeader("Content-Type", "application/xml; name=\"" + fileName + "\"");
    ByteArrayOutputStream outStream = null;
    outStream = new ByteArrayOutputStream();
    try {
        ddiExportService.exportDataFile(fileId, outStream, exclude, include);
        retValue = outStream.toString();
    } catch (Exception e) {
        // We return Service Unavailable.
        throw new ServiceUnavailableException();
    }
    response.setHeader("Access-Control-Allow-Origin", "*");
    return retValue;
}

Also used : DataFile(edu.harvard.iq.dataverse.DataFile) NotFoundException(javax.ws.rs.NotFoundException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ServiceUnavailableException(javax.ws.rs.ServiceUnavailableException) NotFoundException(javax.ws.rs.NotFoundException) ServiceUnavailableException(javax.ws.rs.ServiceUnavailableException) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Aggregations

DataFile (edu.harvard.iq.dataverse.DataFile)111 Dataset (edu.harvard.iq.dataverse.Dataset)39 IOException (java.io.IOException)39 FileMetadata (edu.harvard.iq.dataverse.FileMetadata)30 ArrayList (java.util.ArrayList)25 DatasetVersion (edu.harvard.iq.dataverse.DatasetVersion)20 File (java.io.File)20 FileNotFoundException (java.io.FileNotFoundException)18 Path (javax.ws.rs.Path)18 Dataverse (edu.harvard.iq.dataverse.Dataverse)17 FileInputStream (java.io.FileInputStream)16 AuthenticatedUser (edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser)14 CommandException (edu.harvard.iq.dataverse.engine.command.exception.CommandException)13 Date (java.util.Date)13 GET (javax.ws.rs.GET)13 Test (org.junit.Test)13 Timestamp (java.sql.Timestamp)11 InputStream (java.io.InputStream)10 DataVariable (edu.harvard.iq.dataverse.datavariable.DataVariable)8 FileOutputStream (java.io.FileOutputStream)8