use of edu.harvard.iq.dataverse.dataaccess.DataAccessRequest in project dataverse by IQSS.
the class RemoteDataFrameService method runDataPreprocessing.
public File runDataPreprocessing(DataFile dataFile) {
if (!dataFile.isTabularData()) {
return null;
}
File preprocessedDataFile = null;
try {
// Set up an Rserve connection
RConnection c = new RConnection(RSERVE_HOST, RSERVE_PORT);
c.login(RSERVE_USER, RSERVE_PWD);
// check working directories
// This needs to be done *before* we try to create any files
// there!
setupWorkingDirectory(c);
// send the tabular data file to the Rserve side:
DataAccessRequest daReq = new DataAccessRequest();
StorageIO<DataFile> accessObject = DataAccess.getStorageIO(dataFile, daReq);
if (accessObject == null) {
return null;
}
accessObject.open();
InputStream is = accessObject.getInputStream();
if (is == null) {
return null;
}
// Create the output stream on the remote, R end:
RFileOutputStream os = c.createFile(tempFileNameIn);
int bufsize;
byte[] bffr = new byte[4 * 8192];
// subsettable files:
if (accessObject.getVarHeader() != null) {
os.write(accessObject.getVarHeader().getBytes());
}
while ((bufsize = is.read(bffr)) != -1) {
os.write(bffr, 0, bufsize);
}
is.close();
os.close();
// Rserve code starts here
dbgLog.fine("wrkdir=" + RSERVE_TMP_DIR);
// Locate the R code and run it on the temp file we've just
// created:
String loadlib = "library(rjson)";
c.voidEval(loadlib);
String rscript = readLocalResource(DATAVERSE_R_PREPROCESSING);
dbgLog.fine("preprocessing R code: " + rscript.substring(0, 64));
c.voidEval(rscript);
String runPreprocessing = "json<-preprocess(filename=\"" + tempFileNameIn + "\")";
dbgLog.fine("data preprocessing command: " + runPreprocessing);
c.voidEval(runPreprocessing);
// Save the output in a temp file:
String saveResult = "write(json, file='" + tempFileNameOut + "')";
dbgLog.fine("data preprocessing save command: " + saveResult);
c.voidEval(saveResult);
// Finally, transfer the saved file back on the application side:
int fileSize = getFileSize(c, tempFileNameOut);
preprocessedDataFile = transferRemoteFile(c, tempFileNameOut, PREPROCESS_FILE_PREFIX, "json", fileSize);
String deleteLine = "file.remove('" + tempFileNameOut + "')";
c.eval(deleteLine);
c.close();
} catch (RserveException rse) {
// RserveException (Rserve is not running maybe?)
// TODO: *ABSOLUTELY* need more diagnostics here!
rse.printStackTrace();
return null;
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
return preprocessedDataFile;
}
Aggregations