use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.
the class TimeSeriesSemSimulation method topToBottomLayout.
public static void topToBottomLayout(TimeLagGraph graph) {
int xStart = 65;
int yStart = 50;
int xSpace = 100;
int ySpace = 100;
List<Node> lag0Nodes = graph.getLag0Nodes();
Collections.sort(lag0Nodes, new Comparator<Node>() {
public int compare(Node o1, Node o2) {
return o1.getCenterX() - o2.getCenterX();
}
});
int x = xStart - xSpace;
for (Node node : lag0Nodes) {
x += xSpace;
int y = yStart - ySpace;
TimeLagGraph.NodeId id = graph.getNodeId(node);
for (int lag = graph.getMaxLag(); lag >= 0; lag--) {
y += ySpace;
Node _node = graph.getNode(id.getName(), lag);
if (_node == null) {
System.out.println("Couldn't find " + _node);
continue;
}
_node.setCenterX(x);
_node.setCenterY(y);
}
}
}
use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.
the class DataReader method scanForDescription.
/**
* Scans the file for variable definitions and number of cases.
*
* @param varNames Names of variables, if known. Otherwise, if null,
* variables in the series X1, X2, ..., Xn will be made up, one for each
* token in the first row.
* @param lineizer Parses lines, skipping comments.
* @param delimiter Delimiter to tokenize tokens in each row.
* @param firstLine Non-null if a non-variable first line had to be lineized
* @param idIndex The index of the ID column.
*/
private DataSetDescription scanForDescription(List<String> varNames, Lineizer lineizer, Pattern delimiter, String firstLine, int idIndex, boolean variableSectionIncluded) {
// Scan file, collecting up the set of range values for each variables.
List<Set<String>> dataStrings = new ArrayList<>();
for (int i = 0; i < varNames.size(); i++) {
dataStrings.add(new HashSet<String>(varNames.size()));
}
int row = -1;
while (lineizer.hasMoreLines()) {
String line;
if (firstLine == null) {
line = lineizer.nextLine();
} else {
line = firstLine;
firstLine = null;
}
if (line.startsWith("/knowledge")) {
break;
}
++row;
RegexTokenizer tokenizer = new RegexTokenizer(line, delimiter, quoteChar);
int col = -1;
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
++col;
if (col >= dataStrings.size()) {
continue;
}
if ("".equals(token) || missingValueMarker.equals(token)) {
continue;
}
dataStrings.get(col).add(token);
}
if (col < varNames.size() - 1) {
this.logger.log("info", "Line " + lineizer.getLineNumber() + ": Too few tokens; expected " + varNames.size() + " tokens but got " + (col + 1) + " tokens.");
}
if (col > varNames.size() - 1) {
this.logger.log("info", "Line " + lineizer.getLineNumber() + ": Too many tokens; expected " + varNames.size() + " tokens but got " + (col + 1) + " tokens.");
}
}
this.logger.log("info", "\nNumber of data rows = " + (row + 1));
int numRows = row + 1;
// Convert these range values into variable definitions.
List<Node> variables = new ArrayList<>();
VARNAMES: for (int i = 0; i < varNames.size(); i++) {
Set<String> strings = dataStrings.get(i);
// Use known variables if they exist for the corresponding name.
for (Node variable : knownVariables) {
if (variable.getName().equals(varNames.get(i))) {
variables.add(variable);
continue VARNAMES;
}
}
if (isDouble(strings) && !isIntegral(strings) && i != idIndex) {
variables.add(new ContinuousVariable(varNames.get(i)));
} else if (isIntegral(strings) && tooManyDiscreteValues(strings) && i != idIndex) {
String name = varNames.get(i);
if (name.contains(" ")) {
name = name.replaceAll(" ", "_");
varNames.set(i, name);
}
if (!NamingProtocol.isLegalName(name)) {
throw new IllegalArgumentException("Line " + lineizer.getLineNumber() + ": This cannot be used as a variable name: " + name + ".");
}
variables.add(new ContinuousVariable(name));
} else {
List<String> categories = new LinkedList<>(strings);
categories.remove(null);
categories.remove("");
categories.remove(missingValueMarker);
Collections.sort(categories, new Comparator<String>() {
public int compare(String o1, String o2) {
return o1.compareTo(o2);
// try {
// int i1 = Integer.parseInt(o1);
// int i2 = Integer.parseInt(o2);
// return i1 - i2;
// return i2 < i1 ? -1 : i2 == i1 ? 0 : 1;
// }
// catch (NumberFormatException e) {
// return o1.compareTo(o2);
// }
}
});
String name = varNames.get(i);
if (name.contains(" ")) {
name = name.replaceAll(" ", "_");
varNames.set(i, name);
}
if (!NamingProtocol.isLegalName(name)) {
throw new IllegalArgumentException("Line " + lineizer.getLineNumber() + ": This cannot be used as a variable name: " + name + ".");
}
variables.add(new DiscreteVariable(name, categories));
}
}
boolean multColumnIncluded = false;
// variables.
for (int i = 0; i < varNames.size(); i++) {
if (i == idIndex) {
continue;
}
Node node = variables.get(i);
if (node instanceof ContinuousVariable) {
this.logger.log("info", node + " --> Continuous");
} else if (node instanceof DiscreteVariable) {
StringBuilder buf = new StringBuilder();
buf.append(node).append(" --> <");
List<String> categories = ((DiscreteVariable) node).getCategories();
for (int j = 0; j < categories.size(); j++) {
buf.append(categories.get(j));
if (j < categories.size() - 1) {
buf.append(", ");
}
}
buf.append(">");
this.logger.log("info", buf.toString());
}
}
return new DataSetDescription(variables, numRows, idIndex, variableSectionIncluded, delimiter);
}
use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.
the class DataReader method setValue.
private void setValue(DataSet dataSet, int row, int col, String s) {
if (s == null || s.equals("") || s.trim().equals(missingValueMarker)) {
return;
}
if (col >= dataSet.getNumColumns()) {
return;
}
Node node = dataSet.getVariable(col);
if (node instanceof ContinuousVariable) {
try {
double value = Double.parseDouble(s);
dataSet.setDouble(row, col, value);
} catch (NumberFormatException e) {
dataSet.setDouble(row, col, Double.NaN);
}
} else if (node instanceof DiscreteVariable) {
DiscreteVariable var = (DiscreteVariable) node;
int value = var.getCategories().indexOf(s.trim());
if (value == -1) {
dataSet.setInt(row, col, -99);
} else {
dataSet.setInt(row, col, value);
}
}
}
use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.
the class DataWriter method writeRectangularData.
/**
* Writes a dataset to file. The dataset may have continuous and/or discrete
* columns. Note that <code>out</code> is not closed by this method, so
* the close method on <code>out</code> will need to be called externally.
*
* @param dataSet The data set to save.
* @param out The writer to write the output to.
* @param separator The character separating fields, usually '\t' or ','.
* @throws IOException If there is some problem dealing with the writer.
*/
public static void writeRectangularData(DataSet dataSet, Writer out, char separator) throws IOException {
NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat();
StringBuilder buf = new StringBuilder();
for (int col = 0; col < dataSet.getNumColumns(); col++) {
String name = dataSet.getVariable(col).getName();
if (name.trim().equals("")) {
name = "C" + (col - 1);
}
buf.append(name);
if (col < dataSet.getNumColumns() - 1) {
buf.append(separator);
}
}
for (int row = 0; row < dataSet.getNumRows(); row++) {
buf.append("\n");
for (int col = 0; col < dataSet.getNumColumns(); col++) {
Node variable = dataSet.getVariable(col);
if (variable instanceof ContinuousVariable) {
double value = dataSet.getDouble(row, col);
if (ContinuousVariable.isDoubleMissingValue(value)) {
buf.append("*");
} else {
buf.append(nf.format(value));
}
if (col < dataSet.getNumColumns() - 1) {
buf.append(separator);
}
} else if (variable instanceof DiscreteVariable) {
Object obj = dataSet.getObject(row, col);
String val = ((obj == null) ? "" : obj.toString());
buf.append(val);
if (col < dataSet.getNumColumns() - 1) {
buf.append(separator);
}
}
}
}
buf.append("\n");
out.write(buf.toString());
out.close();
}
use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.
the class Discretizer method discretize.
/**
* @return - Discretized dataset.
*/
public DataSet discretize() {
// build list of variables
List<Node> variables = new LinkedList<>();
Map<Node, Node> replacementMapping = new HashMap<>();
for (int i = 0; i < sourceDataSet.getNumColumns(); i++) {
Node variable = sourceDataSet.getVariable(i);
if (variable instanceof ContinuousVariable) {
ContinuousDiscretizationSpec spec = null;
Node _node = null;
for (Node node : specs.keySet()) {
if (node.getName().equals(variable.getName())) {
DiscretizationSpec _spec = specs.get(node);
spec = (ContinuousDiscretizationSpec) _spec;
_node = node;
break;
}
}
if (spec != null) {
if (spec.getMethod() == ContinuousDiscretizationSpec.NONE) {
variables.add(variable);
} else {
List<String> cats = spec.getCategories();
DiscreteVariable var = new DiscreteVariable(variable.getName(), cats);
replacementMapping.put(var, _node);
variables.add(var);
}
} else if (isVariablesCopied()) {
variables.add(variable);
}
} else if (variable instanceof DiscreteVariable) {
DiscreteDiscretizationSpec spec = null;
Node _node = null;
for (Node node : specs.keySet()) {
if (node.getName().equals(variable.getName())) {
DiscretizationSpec _spec = specs.get(node);
spec = (DiscreteDiscretizationSpec) _spec;
_node = node;
break;
}
}
// DiscreteDiscretizationSpec spec = (DiscreteDiscretizationSpec) specs.get(variable);
if (spec != null) {
List<String> cats = spec.getCategories();
DiscreteVariable var = new DiscreteVariable(_node.getName(), cats);
replacementMapping.put(var, _node);
variables.add(var);
} else if (isVariablesCopied()) {
variables.add(variable);
}
} else if (isVariablesCopied()) {
variables.add(variable);
}
}
// build new dataset.
ColtDataSet newDataSet = new ColtDataSet(sourceDataSet.getNumRows(), variables);
for (int i = 0; i < newDataSet.getNumColumns(); i++) {
Node variable = newDataSet.getVariable(i);
Node sourceVar = replacementMapping.get(variable);
if (sourceVar != null && specs.containsKey(sourceVar)) {
if (sourceVar instanceof ContinuousVariable) {
ContinuousDiscretizationSpec spec = (ContinuousDiscretizationSpec) specs.get(sourceVar);
double[] breakpoints = spec.getBreakpoints();
List<String> categories = spec.getCategories();
String name = variable.getName();
double[] trimmedData = new double[newDataSet.getNumRows()];
int col = newDataSet.getColumn(variable);
for (int j = 0; j < sourceDataSet.getNumRows(); j++) {
trimmedData[j] = sourceDataSet.getDouble(j, col);
}
Discretization discretization = discretize(trimmedData, breakpoints, name, categories);
int _col = newDataSet.getColumn(variable);
int[] _data = discretization.getData();
for (int j = 0; j < _data.length; j++) {
newDataSet.setInt(j, _col, _data[j]);
}
} else if (sourceVar instanceof DiscreteVariable) {
DiscreteDiscretizationSpec spec = (DiscreteDiscretizationSpec) specs.get(sourceVar);
int[] remap = spec.getRemap();
int[] trimmedData = new int[newDataSet.getNumRows()];
int col = newDataSet.getColumn(variable);
for (int j = 0; j < sourceDataSet.getNumRows(); j++) {
trimmedData[j] = sourceDataSet.getInt(j, col);
}
int _col = newDataSet.getColumn(variable);
for (int j = 0; j < trimmedData.length; j++) {
newDataSet.setInt(j, _col, remap[trimmedData[j]]);
}
}
} else {
DataUtils.copyColumn(variable, sourceDataSet, newDataSet);
}
}
return newDataSet;
}
Aggregations