use of edu.harvard.iq.dataverse.datavariable.VariableCategory in project dataverse by IQSS.
the class RDATAFileReader method processVariableInfo.
/**
* Get a HashMap matching column number to meta-data used in re-creating R
* Objects
*
* @param metaInfo an "RList" Object containing indices - type, type.string,
* class, levels, and format.
* @param dataTable a dataverse DataTable object
*/
private void processVariableInfo(RList metaInfo, DataTable dataTable) throws IOException {
// list(type = 1, type.string = "integer", class = class(values), levels = NULL, format = NULL)
Integer variableType = -1;
String variableTypeName = "", variableFormat = "";
String[] variableLevels = null;
for (int k = 0; k < metaInfo.size(); k++) {
try {
// Meta-data for a column in the data-set
RList columnMeta = metaInfo.at(k).asList();
// Extract information from the returned list
variableType = !columnMeta.at("type").isNull() ? columnMeta.at("type").asInteger() : null;
variableTypeName = !columnMeta.at("type.string").isNull() ? columnMeta.at("type.string").asString() : null;
variableLevels = !columnMeta.at("levels").isNull() ? columnMeta.at("levels").asStrings() : new String[0];
variableFormat = !columnMeta.at("format").isNull() ? columnMeta.at("format").asString() : null;
LOG.fine("variable type: " + variableType);
LOG.fine("variable type name: " + variableTypeName);
LOG.fine("variable format: " + variableFormat);
for (String variableLevel : variableLevels) {
LOG.fine("variable level: " + variableLevel);
}
if (variableTypeName == null || variableTypeName.equals("character") || variableTypeName.equals("other")) {
// This is a String:
dataTable.getDataVariables().get(k).setTypeCharacter();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
} else if (variableTypeName.equals("integer")) {
dataTable.getDataVariables().get(k).setTypeNumeric();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
} else if (variableTypeName.equals("numeric") || variableTypeName.equals("double")) {
dataTable.getDataVariables().get(k).setTypeNumeric();
dataTable.getDataVariables().get(k).setIntervalContinuous();
} else if (variableTypeName.startsWith("Date")) {
dataTable.getDataVariables().get(k).setTypeCharacter();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
dataTable.getDataVariables().get(k).setFormat(variableFormat);
// instead:
if (variableTypeName.equals("Date")) {
dataTable.getDataVariables().get(k).setFormatCategory("date");
} else if (variableTypeName.equals("DateTime")) {
dataTable.getDataVariables().get(k).setFormatCategory("time");
}
} else if (variableTypeName.equals("factor")) {
// All R factors are *string* factors!
dataTable.getDataVariables().get(k).setTypeCharacter();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
if (variableLevels != null && variableLevels.length > 0) {
// yes, this is a factor, with levels defined.
LOG.fine("this is a factor.");
boolean ordered = false;
if (variableFormat != null && variableFormat.equals("ordered")) {
LOG.fine("an ordered factor, too");
ordered = true;
}
for (int i = 0; i < variableLevels.length; i++) {
VariableCategory cat = new VariableCategory();
cat.setValue(variableLevels[i]);
// Sadly, R factors don't have descriptive labels;
cat.setLabel(variableLevels[i]);
if (ordered) {
cat.setOrder(i + 1);
}
/* cross-link the variable and category to each other: */
cat.setDataVariable(dataTable.getDataVariables().get(k));
dataTable.getDataVariables().get(k).getCategories().add(cat);
}
dataTable.getDataVariables().get(k).setOrderedCategorical(ordered);
}
} else // we turn R factors into - above.
if ("logical".equals(variableTypeName)) {
dataTable.getDataVariables().get(k).setFormatCategory("Boolean");
dataTable.getDataVariables().get(k).setTypeNumeric();
dataTable.getDataVariables().get(k).setIntervalDiscrete();
String[] booleanFactorLabels = new String[2];
booleanFactorLabels[0] = "FALSE";
booleanFactorLabels[1] = "TRUE";
String[] booleanFactorValues = new String[2];
booleanFactorValues[0] = "0";
booleanFactorValues[1] = "1";
for (int i = 0; i < 2; i++) {
VariableCategory cat = new VariableCategory();
cat.setValue(booleanFactorValues[i]);
// Sadly, R factors don't have descriptive labels;
cat.setLabel(booleanFactorLabels[i]);
/* cross-link the variable and category to each other: */
cat.setDataVariable(dataTable.getDataVariables().get(k));
dataTable.getDataVariables().get(k).getCategories().add(cat);
}
}
// Store the meta-data in a hashmap (to return later)
} catch (REXPMismatchException ex) {
// If something went wrong, then it wasn't meant to be for that column.
// And you know what? That's okay.
ex.printStackTrace();
LOG.fine(String.format("Could not process variable %d of the data frame.", k));
}
}
}
Aggregations