use of org.rosuda.REngine.RList in project dataverse by IQSS.
the class RDATAFileReader method getDataFrameInformation.
/**
* Runs an R-script that extracts meta-data from the *original* Rdata
* object, then parses its output and creates DataVariable objects.
*
* @throws IOException if something bad happens?
*/
private void getDataFrameInformation() {
LOG.fine("RDATAFileReader: Entering `getDataFrameInformation` function");
// Store variable names
String[] variableNames = {};
String parentDirectory = mRWorkspace.getRdataFile().getParent();
String fileInfoScript = new StringBuilder("").append(String.format("load(\"%s\")\n", mRWorkspace.getRdataAbsolutePath())).append(String.format("setwd(\"%s\")\n", parentDirectory)).append(RSCRIPT_GET_DATASET).append("\n").append(RSCRIPT_DATASET_INFO_SCRIPT).toString();
try {
RRequest request = mRequestBuilder.build();
request.script(fileInfoScript);
RList fileInformation = request.eval().asList();
RList metaInfo = fileInformation.at("meta.info").asList();
int varQnty = 0;
variableNames = fileInformation.at("varNames").asStrings();
// mDataTypes = fileInformation.at("dataTypes").asStrings();
// Initialize variables:
List<DataVariable> variableList = new ArrayList<>();
for (String varName : variableNames) {
DataVariable dv = new DataVariable();
dv.setName(varName);
dv.setLabel(varName);
// TODO:
// Check if variables have real descriptive labels defined,
// via the mechanismm provided by that special optional package...
// (?) -- L.A.
dv.setInvalidRanges(new ArrayList<>());
dv.setSummaryStatistics(new ArrayList<>());
dv.setUnf("UNF:6:XYZXYZXYZ");
dv.setCategories(new ArrayList<>());
variableList.add(dv);
dv.setFileOrder(varQnty);
dv.setDataTable(dataTable);
// variableLabels.put(varName, varName);
// variableNameList.add(varName);
varQnty++;
}
dataTable.setVarQuantity(new Long(varQnty));
dataTable.setDataVariables(variableList);
// Get the Variable Meta Data Table while Populating
processVariableInfo(metaInfo, dataTable);
if (fileInformation.at("caseQnty") != null) {
int caseQuantity = 0;
try {
caseQuantity = fileInformation.at("caseQnty").asInteger();
} catch (REXPMismatchException rexp) {
// bummer! - but not fatal.
}
if (caseQuantity > 0) {
dataTable.setCaseQuantity(new Long(caseQuantity));
}
}
} catch (REXPMismatchException ex) {
LOG.warning("RDATAFileReader: Could not put information correctly");
} catch (Exception ex) {
ex.printStackTrace();
LOG.warning(ex.getMessage());
}
}
use of org.rosuda.REngine.RList in project rsession by yannrichet.
the class RserveSession method asList.
@Override
public Map asList(Object o) throws ClassCastException {
if (o == null) {
return null;
}
if (o instanceof Map) {
return (Map) o;
}
if (!(o instanceof REXP)) {
throw new IllegalArgumentException("[asList] Not an REXP object: " + o);
}
if (((REXP) o).isNull()) {
return null;
}
try {
RList l = ((REXP) o).asList();
Map m = new HashMap(l.size());
for (String k : l.keys()) {
m.put(k, cast(l.at(k)));
}
return m;
} catch (REXPMismatchException ex) {
throw new ClassCastException("[asList] Cannot cast to matrix " + o);
}
}
use of org.rosuda.REngine.RList in project rsession by yannrichet.
the class RserveSession method set.
/**
* Set R data.frame in R env.
*
* @param varname R list name
* @param data numeric data in list
* @param names names of columns
* @return succeeded ?
*/
@Override
public boolean set(String varname, double[][] data, String... names) {
RList list = buildRList(data, names);
log(HEAD_SET + varname + " <- " + list, Level.INFO);
try {
synchronized (R) {
R.assign(varname, REXP.createDataFrame(list));
}
} catch (REXPMismatchException re) {
re.printStackTrace();
log(HEAD_ERROR + " RList " + list.toString() + " not convertible as dataframe.", Level.ERROR);
return false;
} catch (RserveException ex) {
log(HEAD_EXCEPTION + ex.getMessage() + "\n set(String varname=" + varname + ",double[][] data, String... names)", Level.ERROR);
return false;
}
return true;
}
use of org.rosuda.REngine.RList in project dataverse by IQSS.
the class RDATAFileReader method processVariableInfo.
/**
* Get a HashMap matching column number to meta-data used in re-creating R
* Objects
*
* @param metaInfo an "RList" Object containing indices - type, type.string,
* class, levels, and format.
* @param dataTable a dataverse DataTable object
*/
private void processVariableInfo(RList metaInfo, DataTable dataTable) throws IOException {
// list(type = 1, type.string = "integer", class = class(values), levels = NULL, format = NULL)
Integer variableType = -1;
String variableTypeName = "", variableFormat = "";
String[] variableLevels = null;
for (int k = 0; k < metaInfo.size(); k++) {
try {
// Meta-data for a column in the data-set
RList columnMeta = metaInfo.at(k).asList();
// Extract information from the returned list
variableType = !columnMeta.at("type").isNull() ? columnMeta.at("type").asInteger() : null;
variableTypeName = !columnMeta.at("type.string").isNull() ? columnMeta.at("type.string").asString() : null;
variableLevels = !columnMeta.at("levels").isNull() ? columnMeta.at("levels").asStrings() : new String[0];
variableFormat = !columnMeta.at("format").isNull() ? columnMeta.at("format").asString() : null;
LOG.fine("variable type: " + variableType);
LOG.fine("variable type name: " + variableTypeName);
LOG.fine("variable format: " + variableFormat);
for (String variableLevel : variableLevels) {
LOG.fine("variable level: " + variableLevel);
}
if (variableTypeName == null || variableTypeName.equals("character") || variableTypeName.equals("other")) {
// This is a String:
dataTable.getDataVariables().get(k).setTypeCharacter();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
} else if (variableTypeName.equals("integer")) {
dataTable.getDataVariables().get(k).setTypeNumeric();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
} else if (variableTypeName.equals("numeric") || variableTypeName.equals("double")) {
dataTable.getDataVariables().get(k).setTypeNumeric();
dataTable.getDataVariables().get(k).setIntervalContinuous();
} else if (variableTypeName.startsWith("Date")) {
dataTable.getDataVariables().get(k).setTypeCharacter();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
dataTable.getDataVariables().get(k).setFormat(variableFormat);
// instead:
if (variableTypeName.equals("Date")) {
dataTable.getDataVariables().get(k).setFormatCategory("date");
} else if (variableTypeName.equals("DateTime")) {
dataTable.getDataVariables().get(k).setFormatCategory("time");
}
} else if (variableTypeName.equals("factor")) {
// All R factors are *string* factors!
dataTable.getDataVariables().get(k).setTypeCharacter();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
if (variableLevels != null && variableLevels.length > 0) {
// yes, this is a factor, with levels defined.
LOG.fine("this is a factor.");
boolean ordered = false;
if (variableFormat != null && variableFormat.equals("ordered")) {
LOG.fine("an ordered factor, too");
ordered = true;
}
for (int i = 0; i < variableLevels.length; i++) {
VariableCategory cat = new VariableCategory();
cat.setValue(variableLevels[i]);
// Sadly, R factors don't have descriptive labels;
cat.setLabel(variableLevels[i]);
if (ordered) {
cat.setOrder(i + 1);
}
/* cross-link the variable and category to each other: */
cat.setDataVariable(dataTable.getDataVariables().get(k));
dataTable.getDataVariables().get(k).getCategories().add(cat);
}
dataTable.getDataVariables().get(k).setOrderedCategorical(ordered);
}
} else // we turn R factors into - above.
if ("logical".equals(variableTypeName)) {
dataTable.getDataVariables().get(k).setFormatCategory("Boolean");
dataTable.getDataVariables().get(k).setTypeNumeric();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
String[] booleanFactorLabels = new String[2];
booleanFactorLabels[0] = "FALSE";
booleanFactorLabels[1] = "TRUE";
String[] booleanFactorValues = new String[2];
booleanFactorValues[0] = "0";
booleanFactorValues[1] = "1";
for (int i = 0; i < 2; i++) {
VariableCategory cat = new VariableCategory();
cat.setValue(booleanFactorValues[i]);
// Sadly, R factors don't have descriptive labels;
cat.setLabel(booleanFactorLabels[i]);
/* cross-link the variable and category to each other: */
cat.setDataVariable(dataTable.getDataVariables().get(k));
dataTable.getDataVariables().get(k).getCategories().add(cat);
}
}
// Store the meta-data in a hashmap (to return later)
} catch (REXPMismatchException ex) {
// If something went wrong, then it wasn't meant to be for that column.
// And you know what? That's okay.
ex.printStackTrace();
LOG.fine(String.format("Could not process variable %d of the data frame.", k));
}
}
}
use of org.rosuda.REngine.RList in project rsession by yannrichet.
the class RserveSession method buildRList.
/**
* Build R liost in R env.
*
* @param data numeric data (eg matrix)
* @param names names of columns
* @return RList object
*/
public static RList buildRList(double[][] data, String... names) {
if (data == null) {
if (names == null) {
return null;
}
REXP[] nulls = new REXP[names.length];
for (int i = 0; i < nulls.length; i++) {
nulls[i] = new REXPDouble(new double[0]);
}
return new RList(nulls, names);
}
assert data[0].length == names.length : "Cannot build R list from " + Arrays.deepToString(data) + " & " + Arrays.toString(names);
REXP[] vals = new REXP[names.length];
for (int i = 0; i < names.length; i++) {
double[] coli = new double[data.length];
for (int j = 0; j < coli.length; j++) {
if (data[j].length > i) {
coli[j] = data[j][i];
} else {
coli[j] = Double.NaN;
}
}
vals[i] = new REXPDouble(coli);
}
return new RList(vals, names);
}
Aggregations