Search in sources :

Example 1 with RList

use of org.rosuda.REngine.RList in project dataverse by IQSS.

the class RDATAFileReader method getDataFrameInformation.

/**
 * Runs an R-script that extracts meta-data from the *original* Rdata
 * object, then parses its output and creates DataVariable objects.
 *
 * @throws IOException if something bad happens?
 */
private void getDataFrameInformation() {
    LOG.fine("RDATAFileReader: Entering `getDataFrameInformation` function");
    // Store variable names
    String[] variableNames = {};
    String parentDirectory = mRWorkspace.getRdataFile().getParent();
    String fileInfoScript = new StringBuilder("").append(String.format("load(\"%s\")\n", mRWorkspace.getRdataAbsolutePath())).append(String.format("setwd(\"%s\")\n", parentDirectory)).append(RSCRIPT_GET_DATASET).append("\n").append(RSCRIPT_DATASET_INFO_SCRIPT).toString();
    try {
        RRequest request = mRequestBuilder.build();
        request.script(fileInfoScript);
        RList fileInformation = request.eval().asList();
        RList metaInfo = fileInformation.at("meta.info").asList();
        int varQnty = 0;
        variableNames = fileInformation.at("varNames").asStrings();
        // mDataTypes = fileInformation.at("dataTypes").asStrings();
        // Initialize variables:
        List<DataVariable> variableList = new ArrayList<>();
        for (String varName : variableNames) {
            DataVariable dv = new DataVariable();
            dv.setName(varName);
            dv.setLabel(varName);
            // TODO:
            // Check if variables have real descriptive labels defined,
            // via the mechanismm provided by that special optional package...
            // (?) -- L.A.
            dv.setInvalidRanges(new ArrayList<>());
            dv.setSummaryStatistics(new ArrayList<>());
            dv.setUnf("UNF:6:XYZXYZXYZ");
            dv.setCategories(new ArrayList<>());
            variableList.add(dv);
            dv.setFileOrder(varQnty);
            dv.setDataTable(dataTable);
            // variableLabels.put(varName, varName);
            // variableNameList.add(varName);
            varQnty++;
        }
        dataTable.setVarQuantity(new Long(varQnty));
        dataTable.setDataVariables(variableList);
        // Get the Variable Meta Data Table while Populating
        processVariableInfo(metaInfo, dataTable);
        if (fileInformation.at("caseQnty") != null) {
            int caseQuantity = 0;
            try {
                caseQuantity = fileInformation.at("caseQnty").asInteger();
            } catch (REXPMismatchException rexp) {
            // bummer! - but not fatal.
            }
            if (caseQuantity > 0) {
                dataTable.setCaseQuantity(new Long(caseQuantity));
            }
        }
    } catch (REXPMismatchException ex) {
        LOG.warning("RDATAFileReader: Could not put information correctly");
    } catch (Exception ex) {
        ex.printStackTrace();
        LOG.warning(ex.getMessage());
    }
}
Also used : REXPMismatchException(org.rosuda.REngine.REXPMismatchException) DataVariable(edu.harvard.iq.dataverse.datavariable.DataVariable) NamingException(javax.naming.NamingException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) REXPMismatchException(org.rosuda.REngine.REXPMismatchException) RList(org.rosuda.REngine.RList)

Example 2 with RList

use of org.rosuda.REngine.RList in project rsession by yannrichet.

the class RserveSession method asList.

@Override
public Map asList(Object o) throws ClassCastException {
    if (o == null) {
        return null;
    }
    if (o instanceof Map) {
        return (Map) o;
    }
    if (!(o instanceof REXP)) {
        throw new IllegalArgumentException("[asList] Not an REXP object: " + o);
    }
    if (((REXP) o).isNull()) {
        return null;
    }
    try {
        RList l = ((REXP) o).asList();
        Map m = new HashMap(l.size());
        for (String k : l.keys()) {
            m.put(k, cast(l.at(k)));
        }
        return m;
    } catch (REXPMismatchException ex) {
        throw new ClassCastException("[asList] Cannot cast to matrix " + o);
    }
}
Also used : HashMap(java.util.HashMap) RList(org.rosuda.REngine.RList) REXPMismatchException(org.rosuda.REngine.REXPMismatchException) REXPString(org.rosuda.REngine.REXPString) HashMap(java.util.HashMap) Map(java.util.Map) REXP(org.rosuda.REngine.REXP)

Example 3 with RList

use of org.rosuda.REngine.RList in project rsession by yannrichet.

the class RserveSession method set.

/**
 * Set R data.frame in R env.
 *
 * @param varname R list name
 * @param data numeric data in list
 * @param names names of columns
 * @return succeeded ?
 */
@Override
public boolean set(String varname, double[][] data, String... names) {
    RList list = buildRList(data, names);
    log(HEAD_SET + varname + " <- " + list, Level.INFO);
    try {
        synchronized (R) {
            R.assign(varname, REXP.createDataFrame(list));
        }
    } catch (REXPMismatchException re) {
        re.printStackTrace();
        log(HEAD_ERROR + " RList " + list.toString() + " not convertible as dataframe.", Level.ERROR);
        return false;
    } catch (RserveException ex) {
        log(HEAD_EXCEPTION + ex.getMessage() + "\n  set(String varname=" + varname + ",double[][] data, String... names)", Level.ERROR);
        return false;
    }
    return true;
}
Also used : RList(org.rosuda.REngine.RList) REXPMismatchException(org.rosuda.REngine.REXPMismatchException) RserveException(org.rosuda.REngine.Rserve.RserveException)

Example 4 with RList

use of org.rosuda.REngine.RList in project dataverse by IQSS.

the class RDATAFileReader method processVariableInfo.

/**
 * Get a HashMap matching column number to meta-data used in re-creating R
 * Objects
 *
 * @param metaInfo an "RList" Object containing indices - type, type.string,
 * class, levels, and format.
 * @param dataTable a dataverse DataTable object
 */
private void processVariableInfo(RList metaInfo, DataTable dataTable) throws IOException {
    // list(type = 1, type.string = "integer", class = class(values), levels = NULL, format = NULL)
    Integer variableType = -1;
    String variableTypeName = "", variableFormat = "";
    String[] variableLevels = null;
    for (int k = 0; k < metaInfo.size(); k++) {
        try {
            // Meta-data for a column in the data-set
            RList columnMeta = metaInfo.at(k).asList();
            // Extract information from the returned list
            variableType = !columnMeta.at("type").isNull() ? columnMeta.at("type").asInteger() : null;
            variableTypeName = !columnMeta.at("type.string").isNull() ? columnMeta.at("type.string").asString() : null;
            variableLevels = !columnMeta.at("levels").isNull() ? columnMeta.at("levels").asStrings() : new String[0];
            variableFormat = !columnMeta.at("format").isNull() ? columnMeta.at("format").asString() : null;
            LOG.fine("variable type: " + variableType);
            LOG.fine("variable type name: " + variableTypeName);
            LOG.fine("variable format: " + variableFormat);
            for (String variableLevel : variableLevels) {
                LOG.fine("variable level: " + variableLevel);
            }
            if (variableTypeName == null || variableTypeName.equals("character") || variableTypeName.equals("other")) {
                // This is a String:
                dataTable.getDataVariables().get(k).setTypeCharacter();
                dataTable.getDataVariables().get(k).setIntervalDiscrete();
            } else if (variableTypeName.equals("integer")) {
                dataTable.getDataVariables().get(k).setTypeNumeric();
                dataTable.getDataVariables().get(k).setIntervalDiscrete();
            } else if (variableTypeName.equals("numeric") || variableTypeName.equals("double")) {
                dataTable.getDataVariables().get(k).setTypeNumeric();
                dataTable.getDataVariables().get(k).setIntervalContinuous();
            } else if (variableTypeName.startsWith("Date")) {
                dataTable.getDataVariables().get(k).setTypeCharacter();
                dataTable.getDataVariables().get(k).setIntervalDiscrete();
                dataTable.getDataVariables().get(k).setFormat(variableFormat);
                // instead:
                if (variableTypeName.equals("Date")) {
                    dataTable.getDataVariables().get(k).setFormatCategory("date");
                } else if (variableTypeName.equals("DateTime")) {
                    dataTable.getDataVariables().get(k).setFormatCategory("time");
                }
            } else if (variableTypeName.equals("factor")) {
                // All R factors are *string* factors!
                dataTable.getDataVariables().get(k).setTypeCharacter();
                dataTable.getDataVariables().get(k).setIntervalDiscrete();
                if (variableLevels != null && variableLevels.length > 0) {
                    // yes, this is a factor, with levels defined.
                    LOG.fine("this is a factor.");
                    boolean ordered = false;
                    if (variableFormat != null && variableFormat.equals("ordered")) {
                        LOG.fine("an ordered factor, too");
                        ordered = true;
                    }
                    for (int i = 0; i < variableLevels.length; i++) {
                        VariableCategory cat = new VariableCategory();
                        cat.setValue(variableLevels[i]);
                        // Sadly, R factors don't have descriptive labels;
                        cat.setLabel(variableLevels[i]);
                        if (ordered) {
                            cat.setOrder(i + 1);
                        }
                        /* cross-link the variable and category to each other: */
                        cat.setDataVariable(dataTable.getDataVariables().get(k));
                        dataTable.getDataVariables().get(k).getCategories().add(cat);
                    }
                    dataTable.getDataVariables().get(k).setOrderedCategorical(ordered);
                }
            } else // we turn R factors into - above.
            if ("logical".equals(variableTypeName)) {
                dataTable.getDataVariables().get(k).setFormatCategory("Boolean");
                dataTable.getDataVariables().get(k).setTypeNumeric();
                dataTable.getDataVariables().get(k).setIntervalDiscrete();
                String[] booleanFactorLabels = new String[2];
                booleanFactorLabels[0] = "FALSE";
                booleanFactorLabels[1] = "TRUE";
                String[] booleanFactorValues = new String[2];
                booleanFactorValues[0] = "0";
                booleanFactorValues[1] = "1";
                for (int i = 0; i < 2; i++) {
                    VariableCategory cat = new VariableCategory();
                    cat.setValue(booleanFactorValues[i]);
                    // Sadly, R factors don't have descriptive labels;
                    cat.setLabel(booleanFactorLabels[i]);
                    /* cross-link the variable and category to each other: */
                    cat.setDataVariable(dataTable.getDataVariables().get(k));
                    dataTable.getDataVariables().get(k).getCategories().add(cat);
                }
            }
        // Store the meta-data in a hashmap (to return later)
        } catch (REXPMismatchException ex) {
            // If something went wrong, then it wasn't meant to be for that column.
            // And you know what? That's okay.
            ex.printStackTrace();
            LOG.fine(String.format("Could not process variable %d of the data frame.", k));
        }
    }
}
Also used : VariableCategory(edu.harvard.iq.dataverse.datavariable.VariableCategory) RList(org.rosuda.REngine.RList) REXPMismatchException(org.rosuda.REngine.REXPMismatchException)

Example 5 with RList

use of org.rosuda.REngine.RList in project rsession by yannrichet.

the class RserveSession method buildRList.

/**
 * Build R liost in R env.
 *
 * @param data numeric data (eg matrix)
 * @param names names of columns
 * @return RList object
 */
public static RList buildRList(double[][] data, String... names) {
    if (data == null) {
        if (names == null) {
            return null;
        }
        REXP[] nulls = new REXP[names.length];
        for (int i = 0; i < nulls.length; i++) {
            nulls[i] = new REXPDouble(new double[0]);
        }
        return new RList(nulls, names);
    }
    assert data[0].length == names.length : "Cannot build R list from " + Arrays.deepToString(data) + " & " + Arrays.toString(names);
    REXP[] vals = new REXP[names.length];
    for (int i = 0; i < names.length; i++) {
        double[] coli = new double[data.length];
        for (int j = 0; j < coli.length; j++) {
            if (data[j].length > i) {
                coli[j] = data[j][i];
            } else {
                coli[j] = Double.NaN;
            }
        }
        vals[i] = new REXPDouble(coli);
    }
    return new RList(vals, names);
}
Also used : REXPDouble(org.rosuda.REngine.REXPDouble) RList(org.rosuda.REngine.RList) REXP(org.rosuda.REngine.REXP)

Aggregations

RList (org.rosuda.REngine.RList)5 REXPMismatchException (org.rosuda.REngine.REXPMismatchException)4 REXP (org.rosuda.REngine.REXP)2 DataVariable (edu.harvard.iq.dataverse.datavariable.DataVariable)1 VariableCategory (edu.harvard.iq.dataverse.datavariable.VariableCategory)1 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 NamingException (javax.naming.NamingException)1 REXPDouble (org.rosuda.REngine.REXPDouble)1 REXPString (org.rosuda.REngine.REXPString)1 RserveException (org.rosuda.REngine.Rserve.RserveException)1