Search in sources :

Example 96 with Node

use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.

the class TimeSeriesSemSimulation method topToBottomLayout.

public static void topToBottomLayout(TimeLagGraph graph) {
    int xStart = 65;
    int yStart = 50;
    int xSpace = 100;
    int ySpace = 100;
    List<Node> lag0Nodes = graph.getLag0Nodes();
    Collections.sort(lag0Nodes, new Comparator<Node>() {

        public int compare(Node o1, Node o2) {
            return o1.getCenterX() - o2.getCenterX();
        }
    });
    int x = xStart - xSpace;
    for (Node node : lag0Nodes) {
        x += xSpace;
        int y = yStart - ySpace;
        TimeLagGraph.NodeId id = graph.getNodeId(node);
        for (int lag = graph.getMaxLag(); lag >= 0; lag--) {
            y += ySpace;
            Node _node = graph.getNode(id.getName(), lag);
            if (_node == null) {
                System.out.println("Couldn't find " + _node);
                continue;
            }
            _node.setCenterX(x);
            _node.setCenterY(y);
        }
    }
}
Also used : Node(edu.cmu.tetrad.graph.Node) TimeLagGraph(edu.cmu.tetrad.graph.TimeLagGraph)

Example 97 with Node

use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.

the class DataReader method scanForDescription.

/**
 * Scans the file for variable definitions and number of cases.
 *
 * @param varNames Names of variables, if known. Otherwise, if null,
 * variables in the series X1, X2, ..., Xn will be made up, one for each
 * token in the first row.
 * @param lineizer Parses lines, skipping comments.
 * @param delimiter Delimiter to tokenize tokens in each row.
 * @param firstLine Non-null if a non-variable first line had to be lineized
 * @param idIndex The index of the ID column.
 */
private DataSetDescription scanForDescription(List<String> varNames, Lineizer lineizer, Pattern delimiter, String firstLine, int idIndex, boolean variableSectionIncluded) {
    // Scan file, collecting up the set of range values for each variables.
    List<Set<String>> dataStrings = new ArrayList<>();
    for (int i = 0; i < varNames.size(); i++) {
        dataStrings.add(new HashSet<String>(varNames.size()));
    }
    int row = -1;
    while (lineizer.hasMoreLines()) {
        String line;
        if (firstLine == null) {
            line = lineizer.nextLine();
        } else {
            line = firstLine;
            firstLine = null;
        }
        if (line.startsWith("/knowledge")) {
            break;
        }
        ++row;
        RegexTokenizer tokenizer = new RegexTokenizer(line, delimiter, quoteChar);
        int col = -1;
        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            ++col;
            if (col >= dataStrings.size()) {
                continue;
            }
            if ("".equals(token) || missingValueMarker.equals(token)) {
                continue;
            }
            dataStrings.get(col).add(token);
        }
        if (col < varNames.size() - 1) {
            this.logger.log("info", "Line " + lineizer.getLineNumber() + ": Too few tokens; expected " + varNames.size() + " tokens but got " + (col + 1) + " tokens.");
        }
        if (col > varNames.size() - 1) {
            this.logger.log("info", "Line " + lineizer.getLineNumber() + ": Too many tokens; expected " + varNames.size() + " tokens but got " + (col + 1) + " tokens.");
        }
    }
    this.logger.log("info", "\nNumber of data rows = " + (row + 1));
    int numRows = row + 1;
    // Convert these range values into variable definitions.
    List<Node> variables = new ArrayList<>();
    VARNAMES: for (int i = 0; i < varNames.size(); i++) {
        Set<String> strings = dataStrings.get(i);
        // Use known variables if they exist for the corresponding name.
        for (Node variable : knownVariables) {
            if (variable.getName().equals(varNames.get(i))) {
                variables.add(variable);
                continue VARNAMES;
            }
        }
        if (isDouble(strings) && !isIntegral(strings) && i != idIndex) {
            variables.add(new ContinuousVariable(varNames.get(i)));
        } else if (isIntegral(strings) && tooManyDiscreteValues(strings) && i != idIndex) {
            String name = varNames.get(i);
            if (name.contains(" ")) {
                name = name.replaceAll(" ", "_");
                varNames.set(i, name);
            }
            if (!NamingProtocol.isLegalName(name)) {
                throw new IllegalArgumentException("Line " + lineizer.getLineNumber() + ": This cannot be used as a variable name: " + name + ".");
            }
            variables.add(new ContinuousVariable(name));
        } else {
            List<String> categories = new LinkedList<>(strings);
            categories.remove(null);
            categories.remove("");
            categories.remove(missingValueMarker);
            Collections.sort(categories, new Comparator<String>() {

                public int compare(String o1, String o2) {
                    return o1.compareTo(o2);
                // try {
                // int i1 = Integer.parseInt(o1);
                // int i2 = Integer.parseInt(o2);
                // return i1 - i2;
                // return i2 < i1 ? -1 : i2 == i1 ? 0 : 1;
                // }
                // catch (NumberFormatException e) {
                // return o1.compareTo(o2);
                // }
                }
            });
            String name = varNames.get(i);
            if (name.contains(" ")) {
                name = name.replaceAll(" ", "_");
                varNames.set(i, name);
            }
            if (!NamingProtocol.isLegalName(name)) {
                throw new IllegalArgumentException("Line " + lineizer.getLineNumber() + ": This cannot be used as a variable name: " + name + ".");
            }
            variables.add(new DiscreteVariable(name, categories));
        }
    }
    boolean multColumnIncluded = false;
    // variables.
    for (int i = 0; i < varNames.size(); i++) {
        if (i == idIndex) {
            continue;
        }
        Node node = variables.get(i);
        if (node instanceof ContinuousVariable) {
            this.logger.log("info", node + " --> Continuous");
        } else if (node instanceof DiscreteVariable) {
            StringBuilder buf = new StringBuilder();
            buf.append(node).append(" --> <");
            List<String> categories = ((DiscreteVariable) node).getCategories();
            for (int j = 0; j < categories.size(); j++) {
                buf.append(categories.get(j));
                if (j < categories.size() - 1) {
                    buf.append(", ");
                }
            }
            buf.append(">");
            this.logger.log("info", buf.toString());
        }
    }
    return new DataSetDescription(variables, numRows, idIndex, variableSectionIncluded, delimiter);
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) Node(edu.cmu.tetrad.graph.Node) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList)

Example 98 with Node

use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.

the class DataReader method setValue.

private void setValue(DataSet dataSet, int row, int col, String s) {
    if (s == null || s.equals("") || s.trim().equals(missingValueMarker)) {
        return;
    }
    if (col >= dataSet.getNumColumns()) {
        return;
    }
    Node node = dataSet.getVariable(col);
    if (node instanceof ContinuousVariable) {
        try {
            double value = Double.parseDouble(s);
            dataSet.setDouble(row, col, value);
        } catch (NumberFormatException e) {
            dataSet.setDouble(row, col, Double.NaN);
        }
    } else if (node instanceof DiscreteVariable) {
        DiscreteVariable var = (DiscreteVariable) node;
        int value = var.getCategories().indexOf(s.trim());
        if (value == -1) {
            dataSet.setInt(row, col, -99);
        } else {
            dataSet.setInt(row, col, value);
        }
    }
}
Also used : Node(edu.cmu.tetrad.graph.Node)

Example 99 with Node

use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.

the class DataWriter method writeRectangularData.

/**
 * Writes a dataset to file. The dataset may have continuous and/or discrete
 * columns. Note that <code>out</code> is not closed by this method, so
 * the close method on <code>out</code> will need to be called externally.
 *
 * @param dataSet   The data set to save.
 * @param out       The writer to write the output to.
 * @param separator The character separating fields, usually '\t' or ','.
 * @throws IOException If there is some problem dealing with the writer.
 */
public static void writeRectangularData(DataSet dataSet, Writer out, char separator) throws IOException {
    NumberFormat nf = NumberFormatUtil.getInstance().getNumberFormat();
    StringBuilder buf = new StringBuilder();
    for (int col = 0; col < dataSet.getNumColumns(); col++) {
        String name = dataSet.getVariable(col).getName();
        if (name.trim().equals("")) {
            name = "C" + (col - 1);
        }
        buf.append(name);
        if (col < dataSet.getNumColumns() - 1) {
            buf.append(separator);
        }
    }
    for (int row = 0; row < dataSet.getNumRows(); row++) {
        buf.append("\n");
        for (int col = 0; col < dataSet.getNumColumns(); col++) {
            Node variable = dataSet.getVariable(col);
            if (variable instanceof ContinuousVariable) {
                double value = dataSet.getDouble(row, col);
                if (ContinuousVariable.isDoubleMissingValue(value)) {
                    buf.append("*");
                } else {
                    buf.append(nf.format(value));
                }
                if (col < dataSet.getNumColumns() - 1) {
                    buf.append(separator);
                }
            } else if (variable instanceof DiscreteVariable) {
                Object obj = dataSet.getObject(row, col);
                String val = ((obj == null) ? "" : obj.toString());
                buf.append(val);
                if (col < dataSet.getNumColumns() - 1) {
                    buf.append(separator);
                }
            }
        }
    }
    buf.append("\n");
    out.write(buf.toString());
    out.close();
}
Also used : Node(edu.cmu.tetrad.graph.Node) NumberFormat(java.text.NumberFormat)

Example 100 with Node

use of edu.cmu.tetrad.graph.Node in project tetrad by cmu-phil.

the class Discretizer method discretize.

/**
 * @return - Discretized dataset.
 */
public DataSet discretize() {
    // build list of variables
    List<Node> variables = new LinkedList<>();
    Map<Node, Node> replacementMapping = new HashMap<>();
    for (int i = 0; i < sourceDataSet.getNumColumns(); i++) {
        Node variable = sourceDataSet.getVariable(i);
        if (variable instanceof ContinuousVariable) {
            ContinuousDiscretizationSpec spec = null;
            Node _node = null;
            for (Node node : specs.keySet()) {
                if (node.getName().equals(variable.getName())) {
                    DiscretizationSpec _spec = specs.get(node);
                    spec = (ContinuousDiscretizationSpec) _spec;
                    _node = node;
                    break;
                }
            }
            if (spec != null) {
                if (spec.getMethod() == ContinuousDiscretizationSpec.NONE) {
                    variables.add(variable);
                } else {
                    List<String> cats = spec.getCategories();
                    DiscreteVariable var = new DiscreteVariable(variable.getName(), cats);
                    replacementMapping.put(var, _node);
                    variables.add(var);
                }
            } else if (isVariablesCopied()) {
                variables.add(variable);
            }
        } else if (variable instanceof DiscreteVariable) {
            DiscreteDiscretizationSpec spec = null;
            Node _node = null;
            for (Node node : specs.keySet()) {
                if (node.getName().equals(variable.getName())) {
                    DiscretizationSpec _spec = specs.get(node);
                    spec = (DiscreteDiscretizationSpec) _spec;
                    _node = node;
                    break;
                }
            }
            // DiscreteDiscretizationSpec spec = (DiscreteDiscretizationSpec) specs.get(variable);
            if (spec != null) {
                List<String> cats = spec.getCategories();
                DiscreteVariable var = new DiscreteVariable(_node.getName(), cats);
                replacementMapping.put(var, _node);
                variables.add(var);
            } else if (isVariablesCopied()) {
                variables.add(variable);
            }
        } else if (isVariablesCopied()) {
            variables.add(variable);
        }
    }
    // build new dataset.
    ColtDataSet newDataSet = new ColtDataSet(sourceDataSet.getNumRows(), variables);
    for (int i = 0; i < newDataSet.getNumColumns(); i++) {
        Node variable = newDataSet.getVariable(i);
        Node sourceVar = replacementMapping.get(variable);
        if (sourceVar != null && specs.containsKey(sourceVar)) {
            if (sourceVar instanceof ContinuousVariable) {
                ContinuousDiscretizationSpec spec = (ContinuousDiscretizationSpec) specs.get(sourceVar);
                double[] breakpoints = spec.getBreakpoints();
                List<String> categories = spec.getCategories();
                String name = variable.getName();
                double[] trimmedData = new double[newDataSet.getNumRows()];
                int col = newDataSet.getColumn(variable);
                for (int j = 0; j < sourceDataSet.getNumRows(); j++) {
                    trimmedData[j] = sourceDataSet.getDouble(j, col);
                }
                Discretization discretization = discretize(trimmedData, breakpoints, name, categories);
                int _col = newDataSet.getColumn(variable);
                int[] _data = discretization.getData();
                for (int j = 0; j < _data.length; j++) {
                    newDataSet.setInt(j, _col, _data[j]);
                }
            } else if (sourceVar instanceof DiscreteVariable) {
                DiscreteDiscretizationSpec spec = (DiscreteDiscretizationSpec) specs.get(sourceVar);
                int[] remap = spec.getRemap();
                int[] trimmedData = new int[newDataSet.getNumRows()];
                int col = newDataSet.getColumn(variable);
                for (int j = 0; j < sourceDataSet.getNumRows(); j++) {
                    trimmedData[j] = sourceDataSet.getInt(j, col);
                }
                int _col = newDataSet.getColumn(variable);
                for (int j = 0; j < trimmedData.length; j++) {
                    newDataSet.setInt(j, _col, remap[trimmedData[j]]);
                }
            }
        } else {
            DataUtils.copyColumn(variable, sourceDataSet, newDataSet);
        }
    }
    return newDataSet;
}
Also used : Node(edu.cmu.tetrad.graph.Node)

Aggregations

Node (edu.cmu.tetrad.graph.Node)674 ArrayList (java.util.ArrayList)129 Graph (edu.cmu.tetrad.graph.Graph)106 GraphNode (edu.cmu.tetrad.graph.GraphNode)64 DataSet (edu.cmu.tetrad.data.DataSet)59 LinkedList (java.util.LinkedList)55 ContinuousVariable (edu.cmu.tetrad.data.ContinuousVariable)48 Test (org.junit.Test)48 EdgeListGraph (edu.cmu.tetrad.graph.EdgeListGraph)46 List (java.util.List)45 Dag (edu.cmu.tetrad.graph.Dag)41 TetradMatrix (edu.cmu.tetrad.util.TetradMatrix)41 DiscreteVariable (edu.cmu.tetrad.data.DiscreteVariable)40 ChoiceGenerator (edu.cmu.tetrad.util.ChoiceGenerator)37 Endpoint (edu.cmu.tetrad.graph.Endpoint)29 DisplayNode (edu.cmu.tetradapp.workbench.DisplayNode)26 ColtDataSet (edu.cmu.tetrad.data.ColtDataSet)25 Edge (edu.cmu.tetrad.graph.Edge)23 SemIm (edu.cmu.tetrad.sem.SemIm)19 DepthChoiceGenerator (edu.cmu.tetrad.util.DepthChoiceGenerator)19