Search in sources :

Example 1 with DataAccessRequest

use of edu.harvard.iq.dataverse.dataaccess.DataAccessRequest in project dataverse by IQSS.

the class RemoteDataFrameService method runDataPreprocessing.

public File runDataPreprocessing(DataFile dataFile) {
    if (!dataFile.isTabularData()) {
        return null;
    }
    File preprocessedDataFile = null;
    try {
        // Set up an Rserve connection
        RConnection c = new RConnection(RSERVE_HOST, RSERVE_PORT);
        c.login(RSERVE_USER, RSERVE_PWD);
        // check working directories
        // This needs to be done *before* we try to create any files
        // there!
        setupWorkingDirectory(c);
        // send the tabular data file to the Rserve side:
        DataAccessRequest daReq = new DataAccessRequest();
        StorageIO<DataFile> accessObject = DataAccess.getStorageIO(dataFile, daReq);
        if (accessObject == null) {
            return null;
        }
        accessObject.open();
        InputStream is = accessObject.getInputStream();
        if (is == null) {
            return null;
        }
        // Create the output stream on the remote, R end:
        RFileOutputStream os = c.createFile(tempFileNameIn);
        int bufsize;
        byte[] bffr = new byte[4 * 8192];
        // subsettable files:
        if (accessObject.getVarHeader() != null) {
            os.write(accessObject.getVarHeader().getBytes());
        }
        while ((bufsize = is.read(bffr)) != -1) {
            os.write(bffr, 0, bufsize);
        }
        is.close();
        os.close();
        // Rserve code starts here
        dbgLog.fine("wrkdir=" + RSERVE_TMP_DIR);
        // Locate the R code and run it on the temp file we've just
        // created:
        String loadlib = "library(rjson)";
        c.voidEval(loadlib);
        String rscript = readLocalResource(DATAVERSE_R_PREPROCESSING);
        dbgLog.fine("preprocessing R code: " + rscript.substring(0, 64));
        c.voidEval(rscript);
        String runPreprocessing = "json<-preprocess(filename=\"" + tempFileNameIn + "\")";
        dbgLog.fine("data preprocessing command: " + runPreprocessing);
        c.voidEval(runPreprocessing);
        // Save the output in a temp file:
        String saveResult = "write(json, file='" + tempFileNameOut + "')";
        dbgLog.fine("data preprocessing save command: " + saveResult);
        c.voidEval(saveResult);
        // Finally, transfer the saved file back on the application side:
        int fileSize = getFileSize(c, tempFileNameOut);
        preprocessedDataFile = transferRemoteFile(c, tempFileNameOut, PREPROCESS_FILE_PREFIX, "json", fileSize);
        String deleteLine = "file.remove('" + tempFileNameOut + "')";
        c.eval(deleteLine);
        c.close();
    } catch (RserveException rse) {
        // RserveException (Rserve is not running maybe?)
        // TODO: *ABSOLUTELY* need more diagnostics here!
        rse.printStackTrace();
        return null;
    } catch (Exception ex) {
        ex.printStackTrace();
        return null;
    }
    return preprocessedDataFile;
}
Also used : DataFile(edu.harvard.iq.dataverse.DataFile) DataAccessRequest(edu.harvard.iq.dataverse.dataaccess.DataAccessRequest) DataFile(edu.harvard.iq.dataverse.DataFile)

Aggregations

DataFile (edu.harvard.iq.dataverse.DataFile)1 DataAccessRequest (edu.harvard.iq.dataverse.dataaccess.DataAccessRequest)1